{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "c9b925e2-4e54-4467-a62c-c5c3c0c03a9f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T08:14:20.414397Z",
     "start_time": "2025-04-28T08:14:20.410905Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np     #只需要下载numpy库即可\n",
    "import random\n",
    "import GridWorld_v2\n",
    "import time\n",
    "from IPython.display import clear_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "80940075-9a39-4ccc-a524-db6fb8a04e6e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T08:14:20.418023Z",
     "start_time": "2025-04-28T08:14:20.414923Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "⬜️⬜️⬜️⬜️⬜️\n",
      "⬜️🚫🚫⬜️⬜️\n",
      "⬜️⬜️🚫⬜️⬜️\n",
      "⬜️🚫✅🚫⬜️\n",
      "⬜️🚫⬜️⬜️⬜️\n"
     ]
    }
   ],
   "source": [
    "gamma = 0.9   #折扣因子，越接近0越近视\n",
    "\n",
    "rows = 5      #记得行数和列数这里要同步改\n",
    "columns = 5\n",
    "\n",
    "# gridworld = GridWorld_v2.GridWorld_v2(rows=rows, columns=columns, forbiddenAreaNums=8, targetNums=2, seed = 52,forbiddenAreaScore=-10)\n",
    "# gridworld = GridWorld_v2.GridWorld_v2(desc = [\".#\",\".T\"])             #赵老师4-1的例子\n",
    "# gridworld = GridWorld_v2.GridWorld_v2(desc = [\"##.T\",\"...#\",\"....\"])  #随便弄的例子\n",
    "gridworld = GridWorld_v2.GridWorld_v2(forbiddenAreaScore=-10, score=1,desc = [\".....\",\".##..\",\"..#..\",\".#T#.\",\".#...\"]) \n",
    "#gridworld = GridWorld_v2(forbiddenAreaScore=-10, score=1,desc = [\"T.\"]) \n",
    "gridworld.show()\n",
    "\n",
    "\n",
    "value = np.zeros(rows*columns)       #初始化可以任意，也可以全0\n",
    "qtable = np.zeros((rows*columns,5))  #初始化，这里主要是初始化维数，里面的内容会被覆盖所以无所谓\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b911a060-21d5-4de2-8a7e-83a82ddb7547",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T08:20:39.010351Z",
     "start_time": "2025-04-28T08:20:39.004457Z"
    }
   },
   "outputs": [],
   "source": [
    "def Expected_SARSA(gridworld:GridWorld_v2.GridWorld_v2,gamma = 0.99,trajectorySteps=-1, learning_rate=0.001, final_epsilon=0.01, num_episodes=600)->GridWorld_v2.GridWorld_v2:\n",
    "    \"\"\"\n",
    "    这是Expected_SARSA算法\n",
    "\n",
    "    Parameters:\n",
    "    gamma (float): 近视远视程度\n",
    "    trajectorySteps (int): 寻路的轨迹长度，如果是-1，则为寻到目的则停止，否则参数即为trajectory长度\n",
    "    learning_rate (float): 学习率，用于调节TD-target\n",
    "    epsilon (float): epsilon-greedy的核心参数，0~1的浮点数，其中1则表示当前state所有决策概率一样，0则表示决策没有任何的随机性\n",
    "    num_episodes (int): 表示模型迭代次数\n",
    "\n",
    "    Returns:\n",
    "    GridWorld_v2.GridWorld_v2: 把模型返回回去\n",
    "    \"\"\"\n",
    "    \n",
    "    state_value = np.zeros((rows * columns))\n",
    "    action_value = np.zeros((rows * columns, 5)) #初始化action_value table\n",
    "    policy = np.eye(5)[np.random.randint(0,5,size=(rows*columns))] \n",
    "    epsilon = 0.5\n",
    "    for episode in range(num_episodes):\n",
    "        #清除输出，可以更好的展示策略\n",
    "        # time.sleep(0.2)\n",
    "        # clear_output(wait=True)\n",
    "        \n",
    "        print(\"episode\",f\"{episode}/{num_episodes}\")\n",
    "        if(epsilon > final_epsilon) :\n",
    "            epsilon -= 0.001\n",
    "        else:\n",
    "            epsilon = final_epsilon\n",
    "\n",
    "        # p1是目标方向的概率，p0是另外四个方向的概率\n",
    "        p1 = 1-epsilon * (4/5)\n",
    "        p0 = epsilon/5\n",
    "        d = {1:p1, 0:p0}\n",
    "        # policy_epsilon是policy取epsilon-greedy的概率决策\n",
    "        print(\"p1\",p1,\"p0\",p0)\n",
    "        policy_epsilon = np.vectorize(d.get)(policy)\n",
    "\n",
    "        #cnt数组用来检查每个state有多少次访问\n",
    "        cnt = [0 for i in range(25)]\n",
    "        \n",
    "        initState=10\n",
    "        initAction=random.randint(0,4)  \n",
    "\n",
    "        if trajectorySteps==-1:\n",
    "            stop_when_reach_target = True\n",
    "        Trajectory = gridworld.getTrajectoryScore(nowState=initState, \n",
    "                                                  action=initAction, \n",
    "                                                  policy=policy_epsilon, \n",
    "                                                  steps=trajectorySteps, \n",
    "                                                  stop_when_reach_target=True)\n",
    "        Trajectory.append((17,4,1,17,4)) #让他最后自转一下\n",
    "        print(\"trajectorySteps\",len(Trajectory))\n",
    "        \n",
    "\n",
    "        \n",
    "        # 注意这里的返回值是大小为(trajectorySteps+1)的元组列表，因为把第一个动作也加入进去了\n",
    "        steps = len(Trajectory) - 1\n",
    "        for k in range(steps,-1,-1):\n",
    "            #State，Action，Reward，NextState，NextAction\n",
    "            tmpstate, tmpaction, tmpscore, nextState, nextAction  = Trajectory[k]\n",
    "            cnt[tmpstate] += 1\n",
    "            #Expected SARSA,这里的v是一个期望，因为policy_epsilon是一个概率分布，所以v是一个期望\n",
    "            #action_value[nextState]:\n",
    "            # 这是一个Q值数组，具有当前状态为 nextState 的所有可能动作（actions）对应的Q值。数组的每个元素表示在 nextState 状态下选择某个特定动作的预期收益。\n",
    "            # policy_epsilon[nextState]:\n",
    "            # 这是我们当前策略（policy）在 nextState 状态下的概率分布。它是通过 epsilon-greedy 策略计算得出的，表示在这个状态下，每个可能动作被选择的概率。这里的v就是一个平均\n",
    "            v = (action_value[nextState] * policy_epsilon[nextState]).sum() \n",
    "            TD_error = action_value[tmpstate][tmpaction] - (tmpscore + gamma * v)\n",
    "            action_value[tmpstate][tmpaction] -= learning_rate * TD_error\n",
    "\n",
    "        # policy improvement\n",
    "        policy = np.eye(5)[np.argmax(action_value,axis=1)]  #qtable的最优值作为更新策略，并用独热码来表示\n",
    "        policy_epsilon = np.vectorize(d.get)(policy)\n",
    "    \n",
    "        #输出每个state的访问次数\n",
    "        print(np.array(cnt).reshape(5,5)) \n",
    "\n",
    "        state_value = np.sum(policy_epsilon * action_value,axis=1)  #为了打印显示理解\n",
    "        mean_state_value = np.sum(policy_epsilon * action_value,axis=1).mean() #为了打印显示理解\n",
    "        \n",
    "        gridworld.showPolicy(policy)\n",
    "        print(np.round(state_value,decimals=4).reshape(5,5))\n",
    "        print(\"mean_state_value\", mean_state_value)\n",
    "\n",
    "    return gridworld\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3ae4fea5-8d79-4dd0-bec1-2c9b4334d388",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-28T08:20:41.876771Z",
     "start_time": "2025-04-28T08:20:40.842168Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "episode 0/600\n",
      "p1 0.6008 p0 0.0998\n",
      "trajectorySteps 56\n",
      "[[26  1  1  0  0]\n",
      " [ 5  0  1  0  0]\n",
      " [ 2  0  1  2  4]\n",
      " [ 0  1  2  1  3]\n",
      " [ 0  1  1  1  3]]\n",
      "➡️⬆️⬆️⬆️⬆️\n",
      "➡️⏫️⏫️⬆️⬆️\n",
      "➡️⬆️⏫️⬆️⬆️\n",
      "⬆️⏩️✅⏫️⬆️\n",
      "⬆️⏩️⬆️⬆️⬇️\n",
      "[[-0.0015 -0.     -0.001   0.      0.    ]\n",
      " [-0.      0.     -0.001   0.      0.    ]\n",
      " [-0.      0.     -0.     -0.     -0.0001]\n",
      " [ 0.      0.0006 -0.0004 -0.     -0.001 ]\n",
      " [ 0.     -0.001  -0.001  -0.     -0.0001]]\n",
      "mean_state_value -0.00025958809591138473\n",
      "episode 1/600\n",
      "p1 0.6015999999999999 p0 0.0996\n",
      "trajectorySteps 1521\n",
      "[[ 41 264 231 319 428]\n",
      " [ 11  54  43  42  57]\n",
      " [  3   8   4   2   9]\n",
      " [  1   0   2   1   1]\n",
      " [  0   0   0   0   0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬅️⏪⏩️⬇️⬇️\n",
      "⬅️⬇️⏬⬆️⬅️\n",
      "➡️⏩️✅⏫️➡️\n",
      "⬆️⏩️⬆️⬆️⬇️\n",
      "[[-0.0024 -0.0451 -0.0398 -0.0184 -0.0305]\n",
      " [-0.007  -0.0112 -0.0111 -0.0061 -0.0009]\n",
      " [-0.     -0.008  -0.0024 -0.001  -0.0001]\n",
      " [-0.      0.0006  0.0002 -0.     -0.001 ]\n",
      " [ 0.     -0.001  -0.001  -0.     -0.0001]]\n",
      "mean_state_value -0.007445822371918201\n",
      "episode 2/600\n",
      "p1 0.6024 p0 0.0994\n",
      "trajectorySteps 35\n",
      "[[ 0  0  0  0  0]\n",
      " [11  0  0  0  0]\n",
      " [20  1  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "⬇️⬇️⏬⬆️⬅️\n",
      "➡️⏩️✅⏫️➡️\n",
      "⬆️⏩️⬆️⬆️⬇️\n",
      "[[-0.0024 -0.045  -0.0397 -0.0184 -0.0304]\n",
      " [-0.0075 -0.0112 -0.0111 -0.0061 -0.0009]\n",
      " [-0.0012 -0.0089 -0.0018 -0.001  -0.0001]\n",
      " [-0.      0.0006  0.0014 -0.     -0.001 ]\n",
      " [ 0.     -0.001  -0.001  -0.     -0.0001]]\n",
      "mean_state_value -0.0074662191975599455\n",
      "episode 3/600\n",
      "p1 0.6032 p0 0.0992\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [4 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "🔄⬇️⏬⬆️⬅️\n",
      "⬇️⏩️✅⏫️➡️\n",
      "⬆️⏩️⬆️⬆️⬇️\n",
      "[[-0.0024 -0.0449 -0.0396 -0.0183 -0.0303]\n",
      " [-0.0084 -0.0112 -0.011  -0.0061 -0.0009]\n",
      " [-0.0013 -0.0089 -0.0018 -0.001  -0.0001]\n",
      " [-0.001   0.0012  0.0026 -0.     -0.001 ]\n",
      " [ 0.     -0.001  -0.001  -0.     -0.0001]]\n",
      "mean_state_value -0.007461813689377693\n",
      "episode 4/600\n",
      "p1 0.604 p0 0.099\n",
      "trajectorySteps 19\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 1 0 0 0]\n",
      " [4 1 2 0 0]\n",
      " [6 1 1 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬅️⏩️✅⏫️➡️\n",
      "⬇️⏬⬆️⬆️⬇️\n",
      "[[-0.0024 -0.0448 -0.0395 -0.0183 -0.0303]\n",
      " [-0.0084 -0.0111 -0.011  -0.0061 -0.0009]\n",
      " [-0.0013 -0.0099 -0.0018 -0.001  -0.0001]\n",
      " [-0.001   0.0012  0.0022 -0.     -0.001 ]\n",
      " [-0.0011 -0.001  -0.0004 -0.     -0.0001]]\n",
      "mean_state_value -0.007520669831503567\n",
      "episode 5/600\n",
      "p1 0.6048 p0 0.0988\n",
      "trajectorySteps 23\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [4 2 0 0 0]\n",
      " [8 2 2 0 0]\n",
      " [3 1 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬⬆️⬅️\n",
      "⬆️⏩️✅⏫️➡️\n",
      "⬆️⏬⬆️⬆️⬇️\n",
      "[[-0.0024 -0.0447 -0.0394 -0.0182 -0.0302]\n",
      " [-0.0084 -0.0111 -0.011  -0.0061 -0.0009]\n",
      " [-0.0013 -0.0109 -0.0018 -0.001  -0.0001]\n",
      " [-0.0015  0.0018  0.0019 -0.     -0.001 ]\n",
      " [-0.0022 -0.002  -0.0004 -0.     -0.0001]]\n",
      "mean_state_value -0.007638304732830976\n",
      "episode 6/600\n",
      "p1 0.6055999999999999 p0 0.0986\n",
      "trajectorySteps 39\n",
      "[[3 0 0 0 0]\n",
      " [4 3 1 4 0]\n",
      " [9 2 1 4 5]\n",
      " [1 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬🔄🔄\n",
      "⬆️⏩️✅⏫️➡️\n",
      "⬆️⏬⬆️⬆️⬇️\n",
      "[[-0.0024 -0.0446 -0.0394 -0.0182 -0.0302]\n",
      " [-0.0095 -0.0131 -0.011  -0.006  -0.0009]\n",
      " [-0.0013 -0.0118 -0.0011 -0.002  -0.0003]\n",
      " [-0.0015  0.0018  0.0025 -0.     -0.001 ]\n",
      " [-0.0022 -0.002  -0.0004 -0.     -0.0001]]\n",
      "mean_state_value -0.007782730567220948\n",
      "episode 7/600\n",
      "p1 0.6064 p0 0.0984\n",
      "trajectorySteps 98\n",
      "[[ 0  0  0  0  0]\n",
      " [ 6  2  0  0  0]\n",
      " [34  7  0  0  0]\n",
      " [23  6  2  0  0]\n",
      " [12  6  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬🔄🔄\n",
      "🔄⏩️✅⏫️➡️\n",
      "🔄⏩️⬆️⬆️⬇️\n",
      "[[-0.0024 -0.0445 -0.0393 -0.0182 -0.0301]\n",
      " [-0.0104 -0.0131 -0.011  -0.006  -0.0009]\n",
      " [-0.0018 -0.0147 -0.0011 -0.002  -0.0003]\n",
      " [-0.0045  0.0005  0.0037 -0.     -0.001 ]\n",
      " [-0.0034 -0.005  -0.0004 -0.     -0.0001]]\n",
      "mean_state_value -0.008241215239730615\n",
      "episode 8/600\n",
      "p1 0.6072 p0 0.0982\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 1 0 0]\n",
      " [4 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬🔄🔄\n",
      "🔄⏩️✅⏫️➡️\n",
      "🔄⏩️⬆️⬆️⬇️\n",
      "[[-0.0024 -0.0445 -0.0392 -0.0181 -0.03  ]\n",
      " [-0.0104 -0.013  -0.0109 -0.006  -0.0009]\n",
      " [-0.0018 -0.0157 -0.0005 -0.002  -0.0003]\n",
      " [-0.0056  0.0005  0.0049 -0.     -0.001 ]\n",
      " [-0.0033 -0.005  -0.0004 -0.     -0.0001]]\n",
      "mean_state_value -0.00823306591225151\n",
      "episode 9/600\n",
      "p1 0.608 p0 0.098\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬🔄🔄\n",
      "⬆️⏩️✅⏫️➡️\n",
      "🔄⏩️⬆️⬆️⬇️\n",
      "[[-0.0024 -0.0444 -0.0391 -0.0181 -0.03  ]\n",
      " [-0.0104 -0.013  -0.0109 -0.006  -0.0009]\n",
      " [-0.0019 -0.0156 -0.0005 -0.002  -0.0003]\n",
      " [-0.0066  0.0011  0.0046 -0.     -0.001 ]\n",
      " [-0.0033 -0.005  -0.0004 -0.     -0.0001]]\n",
      "mean_state_value -0.008248344394588665\n",
      "episode 10/600\n",
      "p1 0.6088 p0 0.0978\n",
      "trajectorySteps 30\n",
      "[[ 0  0  0  0  0]\n",
      " [ 3  1  0  0  0]\n",
      " [21  1  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬🔄🔄\n",
      "⬆️⏩️✅⏫️➡️\n",
      "🔄⏩️⬆️⬆️⬇️\n",
      "[[-0.0024 -0.0443 -0.039  -0.0181 -0.0299]\n",
      " [-0.0104 -0.013  -0.0109 -0.006  -0.0009]\n",
      " [-0.0022 -0.0166 -0.0005 -0.002  -0.0003]\n",
      " [-0.0075  0.0017  0.0042 -0.     -0.001 ]\n",
      " [-0.0033 -0.005  -0.0004 -0.     -0.0001]]\n",
      "mean_state_value -0.008310709638024162\n",
      "episode 11/600\n",
      "p1 0.6095999999999999 p0 0.09759999999999999\n",
      "trajectorySteps 124\n",
      "[[ 0  0  0  0  0]\n",
      " [15  0  0  0  0]\n",
      " [90  5  0  0  0]\n",
      " [ 9  1  2  0  0]\n",
      " [ 0  1  1  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️🔄⏬🔄🔄\n",
      "⬇️⏩️✅⏫️➡️\n",
      "🔄⏩️⬆️⬆️⬇️\n",
      "[[-0.0024 -0.0442 -0.039  -0.018  -0.0299]\n",
      " [-0.0106 -0.013  -0.0109 -0.006  -0.0009]\n",
      " [-0.003  -0.0166 -0.0005 -0.002  -0.0003]\n",
      " [-0.0085  0.0007  0.0054 -0.     -0.001 ]\n",
      " [-0.0033 -0.005   0.0002 -0.     -0.0001]]\n",
      "mean_state_value -0.008339242908031638\n",
      "episode 12/600\n",
      "p1 0.6104 p0 0.0974\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️🔄⏬🔄🔄\n",
      "⬇️⏩️✅⏫️➡️\n",
      "🔄⏩️⬆️⬆️⬇️\n",
      "[[-0.0024 -0.0441 -0.0389 -0.018  -0.0298]\n",
      " [-0.0106 -0.0129 -0.0108 -0.006  -0.0009]\n",
      " [-0.003  -0.0165 -0.0005 -0.0019 -0.0003]\n",
      " [-0.0094  0.0013  0.0067 -0.     -0.001 ]\n",
      " [-0.0033 -0.005   0.0003 -0.     -0.0001]]\n",
      "mean_state_value -0.008286116822177168\n",
      "episode 13/600\n",
      "p1 0.6112 p0 0.0972\n",
      "trajectorySteps 69\n",
      "[[27  5  2  2  0]\n",
      " [ 2  2  2  4  2]\n",
      " [ 1  1  1  5 11]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬇️🔄⏬➡️🔄\n",
      "⬇️⏩️✅⏫️➡️\n",
      "🔄⏩️⬆️⬆️⬇️\n",
      "[[-0.0029 -0.045  -0.0398 -0.018  -0.0297]\n",
      " [-0.0106 -0.0129 -0.0118 -0.006  -0.0009]\n",
      " [-0.003  -0.0175  0.0001 -0.0029 -0.0005]\n",
      " [-0.0094  0.0014  0.0079 -0.     -0.001 ]\n",
      " [-0.0033 -0.005   0.0003 -0.     -0.0001]]\n",
      "mean_state_value -0.008416915540301138\n",
      "episode 14/600\n",
      "p1 0.612 p0 0.097\n",
      "trajectorySteps 15\n",
      "[[0 0 0 0 0]\n",
      " [0 2 0 0 0]\n",
      " [3 6 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏫️➡️\n",
      "🔄⏩️⬆️⬆️⬇️\n",
      "[[-0.0029 -0.0449 -0.0397 -0.0179 -0.0297]\n",
      " [-0.0106 -0.0129 -0.0118 -0.006  -0.0009]\n",
      " [-0.003  -0.0203  0.0002 -0.0029 -0.0005]\n",
      " [-0.0094  0.002   0.0091 -0.     -0.001 ]\n",
      " [-0.0033 -0.0049  0.0003 -0.     -0.0001]]\n",
      "mean_state_value -0.008440492478784492\n",
      "episode 15/600\n",
      "p1 0.6128 p0 0.0968\n",
      "trajectorySteps 132\n",
      "[[86 10  3  0  0]\n",
      " [10  2  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [14  1  1  0  0]]\n",
      "⬇️⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏫️➡️\n",
      "⬆️⏩️⬆️⬆️⬇️\n",
      "[[-0.0045 -0.0458 -0.0397 -0.0179 -0.0296]\n",
      " [-0.0106 -0.0138 -0.0117 -0.0059 -0.0009]\n",
      " [-0.003  -0.0203  0.0002 -0.0029 -0.0005]\n",
      " [-0.0094  0.002   0.0104 -0.     -0.001 ]\n",
      " [-0.0049 -0.0049  0.0009 -0.     -0.0001]]\n",
      "mean_state_value -0.008554714743441081\n",
      "episode 16/600\n",
      "p1 0.6135999999999999 p0 0.09659999999999999\n",
      "trajectorySteps 31\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [11  2  2  0  0]\n",
      " [15  0  0  0  0]]\n",
      "⬇️⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏫️➡️\n",
      "🔄⏩️⬆️⬆️⬇️\n",
      "[[-0.0044 -0.0457 -0.0396 -0.0178 -0.0296]\n",
      " [-0.0105 -0.0138 -0.0117 -0.0059 -0.0009]\n",
      " [-0.003  -0.0202  0.0002 -0.0029 -0.0005]\n",
      " [-0.0103  0.0017  0.0116 -0.     -0.001 ]\n",
      " [-0.0052 -0.0049  0.0009 -0.     -0.0001]]\n",
      "mean_state_value -0.00855120217593873\n",
      "episode 17/600\n",
      "p1 0.6144000000000001 p0 0.0964\n",
      "trajectorySteps 19\n",
      "[[0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [9 0 0 0 0]\n",
      " [5 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏫️➡️\n",
      "🔄⏩️⬆️⬆️⬇️\n",
      "[[-0.0044 -0.0456 -0.0395 -0.0178 -0.0295]\n",
      " [-0.0105 -0.0138 -0.0117 -0.0059 -0.0009]\n",
      " [-0.0032 -0.0202  0.0002 -0.0029 -0.0005]\n",
      " [-0.0114  0.0023  0.0122 -0.     -0.001 ]\n",
      " [-0.0052 -0.0049  0.0009 -0.     -0.0001]]\n",
      "mean_state_value -0.008535388385896556\n",
      "episode 18/600\n",
      "p1 0.6152 p0 0.0962\n",
      "trajectorySteps 68\n",
      "[[ 0  0  0  0  0]\n",
      " [ 4  1  0  0  0]\n",
      " [48  4  0  0  0]\n",
      " [ 8  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️⬅️🔄⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏫️➡️\n",
      "🔄⏩️⬆️⬆️⬇️\n",
      "[[-0.0044 -0.0455 -0.0394 -0.0178 -0.0294]\n",
      " [-0.0105 -0.0138 -0.0117 -0.0059 -0.0009]\n",
      " [-0.0036 -0.0211  0.0002 -0.0029 -0.0005]\n",
      " [-0.0125  0.0029  0.0135 -0.     -0.001 ]\n",
      " [-0.0051 -0.0049  0.0009 -0.     -0.0001]]\n",
      "mean_state_value -0.00853935508623854\n",
      "episode 19/600\n",
      "p1 0.616 p0 0.096\n",
      "trajectorySteps 14\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 0 0 0 0]\n",
      " [3 0 2 0 0]\n",
      " [3 1 1 0 0]]\n",
      "⬇️⬅️🔄⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏫️➡️\n",
      "🔄⏩️⬆️⬆️⬇️\n",
      "[[-0.0044 -0.0454 -0.0394 -0.0177 -0.0294]\n",
      " [-0.0105 -0.0137 -0.0116 -0.0059 -0.0009]\n",
      " [-0.0036 -0.0211  0.0002 -0.0029 -0.0005]\n",
      " [-0.0126  0.0029  0.0132 -0.     -0.001 ]\n",
      " [-0.0063 -0.0049  0.0015 -0.     -0.0001]]\n",
      "mean_state_value -0.008557419859858608\n",
      "episode 20/600\n",
      "p1 0.6168 p0 0.0958\n",
      "trajectorySteps 79\n",
      "[[ 3  0  0  0  0]\n",
      " [11  1  0  0  0]\n",
      " [17  2  0  0  0]\n",
      " [ 9  0  2  0  0]\n",
      " [32  1  1  0  0]]\n",
      "⬇️⬅️🔄⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏫️➡️\n",
      "⬆️⏩️⬆️⬆️⬇️\n",
      "[[-0.0045 -0.0453 -0.0393 -0.0177 -0.0293]\n",
      " [-0.0117 -0.0137 -0.0116 -0.0059 -0.0009]\n",
      " [-0.0043 -0.021   0.0002 -0.0029 -0.0005]\n",
      " [-0.0126  0.003   0.0144 -0.     -0.001 ]\n",
      " [-0.008  -0.0049  0.0021 -0.     -0.0001]]\n",
      "mean_state_value -0.00861916848014734\n",
      "episode 21/600\n",
      "p1 0.6175999999999999 p0 0.09559999999999999\n",
      "trajectorySteps 54\n",
      "[[ 1  1  0  0  0]\n",
      " [ 4  1  0  0  0]\n",
      " [36  5  1  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏫️➡️\n",
      "⬆️⏩️⬆️⬆️⬇️\n",
      "[[-0.0045 -0.0462 -0.0392 -0.0177 -0.0292]\n",
      " [-0.0117 -0.0137 -0.0116 -0.0059 -0.0009]\n",
      " [-0.0046 -0.0219  0.0008 -0.0029 -0.0005]\n",
      " [-0.0127  0.003   0.0141 -0.     -0.001 ]\n",
      " [-0.008  -0.0049  0.0022 -0.     -0.0001]]\n",
      "mean_state_value -0.008679118353472834\n",
      "episode 22/600\n",
      "p1 0.6184000000000001 p0 0.0954\n",
      "trajectorySteps 16\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [5 1 0 0 0]\n",
      " [5 1 2 0 0]\n",
      " [2 0 0 0 0]]\n",
      "⬇️⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏫️➡️\n",
      "⬆️⏩️⬆️⬆️⬇️\n",
      "[[-0.0045 -0.0461 -0.0391 -0.0176 -0.0292]\n",
      " [-0.0117 -0.0137 -0.0116 -0.0059 -0.0009]\n",
      " [-0.0046 -0.0219  0.0009 -0.0029 -0.0005]\n",
      " [-0.0136  0.0036  0.0138 -0.     -0.001 ]\n",
      " [-0.008  -0.0049  0.0022 -0.     -0.0001]]\n",
      "mean_state_value -0.008684385479630302\n",
      "episode 23/600\n",
      "p1 0.6192 p0 0.09519999999999999\n",
      "trajectorySteps 55\n",
      "[[ 2  0  0  1  0]\n",
      " [ 5  1  2  7  8]\n",
      " [ 3  0  0  0 10]\n",
      " [ 0  0  2  0 10]\n",
      " [ 0  0  1  1  2]]\n",
      "⬇️⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏫️🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0045 -0.046  -0.039  -0.0176 -0.0291]\n",
      " [-0.0127 -0.0146 -0.0116 -0.0068 -0.001 ]\n",
      " [-0.0046 -0.0219  0.0009 -0.0029 -0.0006]\n",
      " [-0.0136  0.0036  0.0151 -0.     -0.0017]\n",
      " [-0.008  -0.0048  0.0028  0.     -0.0002]]\n",
      "mean_state_value -0.008749235700686937\n",
      "episode 24/600\n",
      "p1 0.62 p0 0.095\n",
      "trajectorySteps 8\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [3 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏫️🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0045 -0.0459 -0.039  -0.0176 -0.0291]\n",
      " [-0.0127 -0.0145 -0.0115 -0.0068 -0.001 ]\n",
      " [-0.0047 -0.0228  0.0009 -0.0029 -0.0006]\n",
      " [-0.0136  0.0043  0.0163 -0.     -0.0017]\n",
      " [-0.008  -0.0048  0.0028  0.     -0.0002]]\n",
      "mean_state_value -0.008694389773195182\n",
      "episode 25/600\n",
      "p1 0.6208 p0 0.0948\n",
      "trajectorySteps 98\n",
      "[[16  0  4  2  3]\n",
      " [20  3  1  5  5]\n",
      " [ 3  3  0  5 16]\n",
      " [ 0  0  2  1  6]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0049 -0.0458 -0.039  -0.0175 -0.0291]\n",
      " [-0.0157 -0.0155 -0.0115 -0.0068 -0.001 ]\n",
      " [-0.0047 -0.0227  0.0009 -0.0028 -0.0008]\n",
      " [-0.0136  0.0043  0.0176 -0.     -0.0028]\n",
      " [-0.008  -0.0048  0.0034  0.     -0.0002]]\n",
      "mean_state_value -0.008834246765033462\n",
      "episode 26/600\n",
      "p1 0.6215999999999999 p0 0.09459999999999999\n",
      "trajectorySteps 34\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [13  0  0  0  0]\n",
      " [15  3  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0049 -0.0457 -0.0389 -0.0175 -0.029 ]\n",
      " [-0.0156 -0.0154 -0.0115 -0.0068 -0.001 ]\n",
      " [-0.0048 -0.0227  0.0009 -0.0028 -0.0008]\n",
      " [-0.0155  0.004   0.0188 -0.     -0.0028]\n",
      " [-0.008  -0.0048  0.0034  0.     -0.0002]]\n",
      "mean_state_value -0.008858695287184346\n",
      "episode 27/600\n",
      "p1 0.6224000000000001 p0 0.0944\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0049 -0.0456 -0.0388 -0.0175 -0.029 ]\n",
      " [-0.0156 -0.0154 -0.0115 -0.0067 -0.001 ]\n",
      " [-0.0048 -0.0226  0.0009 -0.0028 -0.0008]\n",
      " [-0.0164  0.0046  0.0201 -0.     -0.0028]\n",
      " [-0.0079 -0.0048  0.0034  0.     -0.0002]]\n",
      "mean_state_value -0.00879789234831241\n",
      "episode 28/600\n",
      "p1 0.6232 p0 0.09419999999999999\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 1 0 0 0]\n",
      " [5 4 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0049 -0.0455 -0.0387 -0.0174 -0.0289]\n",
      " [-0.0156 -0.0154 -0.0114 -0.0067 -0.001 ]\n",
      " [-0.0048 -0.0244  0.0016 -0.0028 -0.0008]\n",
      " [-0.0164  0.0047  0.0198 -0.     -0.0028]\n",
      " [-0.0079 -0.0048  0.0035  0.     -0.0002]]\n",
      "mean_state_value -0.008836843443477788\n",
      "episode 29/600\n",
      "p1 0.624 p0 0.094\n",
      "trajectorySteps 8\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [1 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0049 -0.0454 -0.0387 -0.0174 -0.0288]\n",
      " [-0.0155 -0.0153 -0.0114 -0.0067 -0.001 ]\n",
      " [-0.0048 -0.0244  0.0016 -0.0028 -0.0008]\n",
      " [-0.0173  0.0053  0.0211 -0.     -0.0028]\n",
      " [-0.0079 -0.0048  0.0035  0.     -0.0002]]\n",
      "mean_state_value -0.008779498783645909\n",
      "episode 30/600\n",
      "p1 0.6248 p0 0.0938\n",
      "trajectorySteps 50\n",
      "[[ 0  0  0  0  0]\n",
      " [12  2  0  0  0]\n",
      " [10  0  0  0  0]\n",
      " [12  1  2  0  0]\n",
      " [11  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0049 -0.0453 -0.0386 -0.0173 -0.0288]\n",
      " [-0.0175 -0.0153 -0.0114 -0.0067 -0.001 ]\n",
      " [-0.0049 -0.0243  0.0016 -0.0028 -0.0008]\n",
      " [-0.0183  0.006   0.0208 -0.     -0.0028]\n",
      " [-0.0079 -0.0048  0.0035  0.     -0.0002]]\n",
      "mean_state_value -0.00886501332934902\n",
      "episode 31/600\n",
      "p1 0.6255999999999999 p0 0.09359999999999999\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [5 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0048 -0.0452 -0.0385 -0.0173 -0.0287]\n",
      " [-0.0174 -0.0153 -0.0114 -0.0067 -0.001 ]\n",
      " [-0.0049 -0.0252  0.0016 -0.0028 -0.0008]\n",
      " [-0.0182  0.0066  0.0206 -0.     -0.0028]\n",
      " [-0.0079 -0.0048  0.0035  0.     -0.0002]]\n",
      "mean_state_value -0.008868637738847142\n",
      "episode 32/600\n",
      "p1 0.6264000000000001 p0 0.0934\n",
      "trajectorySteps 54\n",
      "[[ 0  0  0  0  0]\n",
      " [ 3  2  0  0  0]\n",
      " [34  7  2  0  0]\n",
      " [ 4  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0048 -0.0451 -0.0384 -0.0173 -0.0286]\n",
      " [-0.0183 -0.0153 -0.0113 -0.0067 -0.001 ]\n",
      " [-0.0051 -0.028   0.0022 -0.0028 -0.0008]\n",
      " [-0.0182  0.0066  0.0203 -0.     -0.0028]\n",
      " [-0.0079 -0.0048  0.0035  0.     -0.0002]]\n",
      "mean_state_value -0.008987331933308728\n",
      "episode 33/600\n",
      "p1 0.6272 p0 0.09319999999999999\n",
      "trajectorySteps 39\n",
      "[[ 9  1  0  0  0]\n",
      " [ 2  2  0  0  0]\n",
      " [13  9  0  0  0]\n",
      " [ 0  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0051 -0.045  -0.0383 -0.0172 -0.0286]\n",
      " [-0.0192 -0.0153 -0.0113 -0.0067 -0.001 ]\n",
      " [-0.0053 -0.0298  0.0022 -0.0028 -0.0008]\n",
      " [-0.0182  0.0073  0.02   -0.     -0.0028]\n",
      " [-0.0079 -0.0047  0.0035  0.     -0.0002]]\n",
      "mean_state_value -0.00908058265415852\n",
      "episode 34/600\n",
      "p1 0.628 p0 0.093\n",
      "trajectorySteps 85\n",
      "[[22  3  0  0  0]\n",
      " [10  4  2  1  5]\n",
      " [18  0  1  5  8]\n",
      " [ 1  0  2  0  3]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0053 -0.0449 -0.0383 -0.0172 -0.0285]\n",
      " [-0.0229 -0.0162 -0.0122 -0.0066 -0.0011]\n",
      " [-0.0055 -0.0297  0.0029 -0.0037 -0.0008]\n",
      " [-0.0181  0.0073  0.0213 -0.     -0.0029]\n",
      " [-0.0079 -0.0047  0.0035  0.     -0.0002]]\n",
      "mean_state_value -0.00927267901395925\n",
      "episode 35/600\n",
      "p1 0.6288 p0 0.0928\n",
      "trajectorySteps 141\n",
      "[[56  8  5  0  0]\n",
      " [11  1  3  3  5]\n",
      " [ 2  0  0  1 38]\n",
      " [ 0  0  2  0  2]\n",
      " [ 0  0  2  1  1]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0062 -0.0449 -0.0392 -0.0172 -0.0285]\n",
      " [-0.0239 -0.0161 -0.0122 -0.0085 -0.0011]\n",
      " [-0.0055 -0.0296  0.0029 -0.0037 -0.0009]\n",
      " [-0.0181  0.0073  0.0211 -0.     -0.003 ]\n",
      " [-0.0079 -0.0047  0.0041  0.     -0.0002]]\n",
      "mean_state_value -0.00943894392435985\n",
      "episode 36/600\n",
      "p1 0.6295999999999999 p0 0.09259999999999999\n",
      "trajectorySteps 15\n",
      "[[0 0 0 0 0]\n",
      " [0 3 0 0 0]\n",
      " [6 3 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0062 -0.0449 -0.0391 -0.0171 -0.0284]\n",
      " [-0.0238 -0.017  -0.0122 -0.0085 -0.0011]\n",
      " [-0.0056 -0.0323  0.0029 -0.0037 -0.0008]\n",
      " [-0.018   0.008   0.0224 -0.     -0.003 ]\n",
      " [-0.0078 -0.0047  0.0041  0.     -0.0002]]\n",
      "mean_state_value -0.009488228842180172\n",
      "episode 37/600\n",
      "p1 0.6304000000000001 p0 0.0924\n",
      "trajectorySteps 101\n",
      "[[ 3  0  0  0  0]\n",
      " [14  0  0  0  0]\n",
      " [71  2  0  0  0]\n",
      " [ 7  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬆️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0062 -0.0448 -0.039  -0.0171 -0.0283]\n",
      " [-0.0239 -0.017  -0.0122 -0.0084 -0.0011]\n",
      " [-0.0068 -0.0322  0.0029 -0.0037 -0.0008]\n",
      " [-0.019   0.0077  0.0237 -0.     -0.003 ]\n",
      " [-0.0078 -0.0047  0.0041  0.     -0.0002]]\n",
      "mean_state_value -0.009515842043334303\n",
      "episode 38/600\n",
      "p1 0.6312 p0 0.09219999999999999\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [4 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0062 -0.0447 -0.0389 -0.017  -0.0283]\n",
      " [-0.0238 -0.017  -0.0121 -0.0084 -0.0011]\n",
      " [-0.0069 -0.0322  0.0029 -0.0037 -0.0008]\n",
      " [-0.0199  0.0084  0.0234 -0.     -0.0029]\n",
      " [-0.0078 -0.0047  0.0041  0.     -0.0002]]\n",
      "mean_state_value -0.009515648853776277\n",
      "episode 39/600\n",
      "p1 0.632 p0 0.092\n",
      "trajectorySteps 100\n",
      "[[56  2  0  0  0]\n",
      " [15  2  0  0  0]\n",
      " [12  5  0  0  0]\n",
      " [ 5  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0073 -0.0446 -0.0388 -0.017  -0.0282]\n",
      " [-0.0256 -0.017  -0.0121 -0.0084 -0.0011]\n",
      " [-0.0069 -0.0321  0.003  -0.0037 -0.0008]\n",
      " [-0.0208  0.0091  0.0247 -0.     -0.0029]\n",
      " [-0.0078 -0.0047  0.0041  0.     -0.0002]]\n",
      "mean_state_value -0.00957283090711545\n",
      "episode 40/600\n",
      "p1 0.6328 p0 0.09179999999999999\n",
      "trajectorySteps 62\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  1  0  0  0]\n",
      " [ 8  3  2  0  0]\n",
      " [47  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0073 -0.0445 -0.0388 -0.017  -0.0282]\n",
      " [-0.0256 -0.0169 -0.0121 -0.0084 -0.0011]\n",
      " [-0.0069 -0.033   0.003  -0.0037 -0.0008]\n",
      " [-0.0225  0.0097  0.026  -0.     -0.0029]\n",
      " [-0.0084 -0.0047  0.0041  0.     -0.0002]]\n",
      "mean_state_value -0.009603168464189989\n",
      "episode 41/600\n",
      "p1 0.6335999999999999 p0 0.09159999999999999\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0073 -0.0444 -0.0387 -0.0169 -0.0281]\n",
      " [-0.0255 -0.0169 -0.0121 -0.0084 -0.0011]\n",
      " [-0.0069 -0.0329  0.003  -0.0037 -0.0008]\n",
      " [-0.0234  0.0104  0.0258 -0.     -0.0029]\n",
      " [-0.0084 -0.0047  0.0041  0.     -0.0002]]\n",
      "mean_state_value -0.0095960095210535\n",
      "episode 42/600\n",
      "p1 0.6344000000000001 p0 0.0914\n",
      "trajectorySteps 36\n",
      "[[ 0  0  0  0  0]\n",
      " [ 5  0  0  0  0]\n",
      " [17  0  0  0  0]\n",
      " [ 7  1  2  0  0]\n",
      " [ 4  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0073 -0.0443 -0.0386 -0.0169 -0.028 ]\n",
      " [-0.0255 -0.0169 -0.012  -0.0083 -0.0011]\n",
      " [-0.0072 -0.0328  0.003  -0.0037 -0.0008]\n",
      " [-0.0244  0.011   0.0256 -0.     -0.0029]\n",
      " [-0.0086 -0.0047  0.0041  0.     -0.0002]]\n",
      "mean_state_value -0.00961682021971907\n",
      "episode 43/600\n",
      "p1 0.6352 p0 0.09119999999999999\n",
      "trajectorySteps 105\n",
      "[[35  7  3  0  1]\n",
      " [13  0  2  1  2]\n",
      " [26  0  0  0  1]\n",
      " [ 3  0  2  0  1]\n",
      " [ 1  1  2  1  3]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏬⏩️➡️🔄\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.008  -0.0442 -0.0394 -0.0169 -0.028 ]\n",
      " [-0.0257 -0.0168 -0.0129 -0.0083 -0.0011]\n",
      " [-0.0074 -0.0328  0.003  -0.0037 -0.0008]\n",
      " [-0.0244  0.0111  0.0269 -0.     -0.0029]\n",
      " [-0.0086 -0.0046  0.0039  0.     -0.0003]]\n",
      "mean_state_value -0.009678397456771475\n",
      "episode 44/600\n",
      "p1 0.636 p0 0.091\n",
      "trajectorySteps 36\n",
      "[[ 0  0  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [26  1  0  0  0]\n",
      " [ 2  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏬⏩️➡️🔄\n",
      "⬇️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.008  -0.0441 -0.0394 -0.0168 -0.0279]\n",
      " [-0.0257 -0.0168 -0.0129 -0.0083 -0.0011]\n",
      " [-0.0078 -0.0327  0.003  -0.0036 -0.0008]\n",
      " [-0.0252  0.0117  0.0267 -0.     -0.0029]\n",
      " [-0.0085 -0.0046  0.0039  0.     -0.0003]]\n",
      "mean_state_value -0.00968591406463278\n",
      "episode 45/600\n",
      "p1 0.6368 p0 0.09079999999999999\n",
      "trajectorySteps 26\n",
      "[[ 1  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [10  0  0  0  0]\n",
      " [ 9  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏬⏩️➡️🔄\n",
      "➡️⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.008  -0.044  -0.0393 -0.0168 -0.0279]\n",
      " [-0.0256 -0.0168 -0.0129 -0.0083 -0.0011]\n",
      " [-0.0078 -0.0326  0.003  -0.0036 -0.0008]\n",
      " [-0.0261  0.0124  0.028  -0.     -0.0029]\n",
      " [-0.0085 -0.0046  0.0039  0.     -0.0003]]\n",
      "mean_state_value -0.009622779546880263\n",
      "episode 46/600\n",
      "p1 0.6376 p0 0.09059999999999999\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [1 2 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏬⏩️➡️🔄\n",
      "➡️⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0079 -0.0439 -0.0392 -0.0168 -0.0278]\n",
      " [-0.0256 -0.0167 -0.0128 -0.0083 -0.0011]\n",
      " [-0.0078 -0.0325  0.0031 -0.0036 -0.0008]\n",
      " [-0.027   0.0122  0.0287 -0.     -0.0029]\n",
      " [-0.0085 -0.0046  0.0039  0.     -0.0003]]\n",
      "mean_state_value -0.009613276131927986\n",
      "episode 47/600\n",
      "p1 0.6384000000000001 p0 0.0904\n",
      "trajectorySteps 42\n",
      "[[14  2  0  0  0]\n",
      " [ 7  2  0  0  0]\n",
      " [ 7  6  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️🔄\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0083 -0.0438 -0.0391 -0.0167 -0.0277]\n",
      " [-0.0255 -0.0167 -0.0128 -0.0083 -0.0011]\n",
      " [-0.0079 -0.0343  0.0031 -0.0036 -0.0008]\n",
      " [-0.0278  0.0129  0.03   -0.     -0.0029]\n",
      " [-0.0085 -0.0046  0.0039  0.     -0.0003]]\n",
      "mean_state_value -0.00963561851339717\n",
      "episode 48/600\n",
      "p1 0.6392 p0 0.09019999999999999\n",
      "trajectorySteps 28\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 7  1  0  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [11  2  1  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️🔄\n",
      "🔄⬅️⏬➡️⬆️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0083 -0.0437 -0.039  -0.0167 -0.0277]\n",
      " [-0.0255 -0.0166 -0.0128 -0.0082 -0.0011]\n",
      " [-0.0079 -0.0342  0.0031 -0.0036 -0.0008]\n",
      " [-0.0278  0.0129  0.0313 -0.     -0.0029]\n",
      " [-0.0097 -0.0055  0.0046  0.     -0.0003]]\n",
      "mean_state_value -0.009617965896296012\n",
      "episode 49/600\n",
      "p1 0.64 p0 0.09\n",
      "trajectorySteps 26\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [12  3  0  0  0]\n",
      " [ 4  2  2  0  0]\n",
      " [ 2  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0083 -0.0436 -0.0389 -0.0166 -0.0276]\n",
      " [-0.0254 -0.0166 -0.0127 -0.0082 -0.0011]\n",
      " [-0.0081 -0.0341  0.0031 -0.0036 -0.0008]\n",
      " [-0.0295  0.0136  0.0326 -0.     -0.0029]\n",
      " [-0.0097 -0.0055  0.0046  0.     -0.0003]]\n",
      "mean_state_value -0.009591408986101222\n",
      "episode 50/600\n",
      "p1 0.6408 p0 0.08979999999999999\n",
      "trajectorySteps 28\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [18  1  1  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0083 -0.0435 -0.0388 -0.0166 -0.0275]\n",
      " [-0.0254 -0.0166 -0.0127 -0.0082 -0.0011]\n",
      " [-0.0081 -0.0341  0.0031 -0.0036 -0.0008]\n",
      " [-0.0295  0.0136  0.0325 -0.     -0.0029]\n",
      " [-0.011  -0.0055  0.0052  0.     -0.0003]]\n",
      "mean_state_value -0.00959935212584763\n",
      "episode 51/600\n",
      "p1 0.6416 p0 0.08959999999999999\n",
      "trajectorySteps 263\n",
      "[[77 10  4  1  0]\n",
      " [31  5  0  6 52]\n",
      " [45  6  0  2  8]\n",
      " [ 8  0  2  2  4]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0101 -0.0437 -0.0388 -0.0166 -0.0275]\n",
      " [-0.0284 -0.0184 -0.0127 -0.0082 -0.0016]\n",
      " [-0.0087 -0.034   0.0031 -0.0036 -0.0009]\n",
      " [-0.0294  0.0137  0.0323 -0.0002 -0.0039]\n",
      " [-0.011  -0.0054  0.0053  0.     -0.0003]]\n",
      "mean_state_value -0.009960979731402643\n",
      "episode 52/600\n",
      "p1 0.6424000000000001 p0 0.0894\n",
      "trajectorySteps 16\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [5 1 2 0 0]\n",
      " [6 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0101 -0.0436 -0.0388 -0.0165 -0.0274]\n",
      " [-0.0284 -0.0183 -0.0127 -0.0082 -0.0015]\n",
      " [-0.0087 -0.0339  0.0031 -0.0036 -0.0009]\n",
      " [-0.0303  0.0143  0.0336 -0.0002 -0.0039]\n",
      " [-0.0113 -0.0054  0.0053  0.     -0.0003]]\n",
      "mean_state_value -0.009907971949113721\n",
      "episode 53/600\n",
      "p1 0.6432 p0 0.08919999999999999\n",
      "trajectorySteps 52\n",
      "[[17  1  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [20  2  0  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [ 3  1  1  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0103 -0.0435 -0.0387 -0.0165 -0.0274]\n",
      " [-0.0283 -0.0183 -0.0126 -0.0082 -0.0015]\n",
      " [-0.0089 -0.0339  0.0031 -0.0036 -0.0009]\n",
      " [-0.0303  0.0144  0.0349 -0.0002 -0.0039]\n",
      " [-0.0122 -0.0054  0.0059  0.     -0.0003]]\n",
      "mean_state_value -0.009855763807493109\n",
      "episode 54/600\n",
      "p1 0.644 p0 0.089\n",
      "trajectorySteps 31\n",
      "[[ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [18  2  0  0  0]\n",
      " [ 5  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0103 -0.0434 -0.0386 -0.0165 -0.0273]\n",
      " [-0.0283 -0.0182 -0.0126 -0.0081 -0.0015]\n",
      " [-0.0091 -0.0338  0.0032 -0.0036 -0.0009]\n",
      " [-0.0312  0.0151  0.0357 -0.0002 -0.0039]\n",
      " [-0.0122 -0.0054  0.006   0.     -0.0003]]\n",
      "mean_state_value -0.009823631763718796\n",
      "episode 55/600\n",
      "p1 0.6448 p0 0.08879999999999999\n",
      "trajectorySteps 20\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [8 1 2 0 0]\n",
      " [6 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0103 -0.0433 -0.0385 -0.0164 -0.0272]\n",
      " [-0.0282 -0.0182 -0.0126 -0.0081 -0.0015]\n",
      " [-0.0091 -0.0337  0.0032 -0.0036 -0.0009]\n",
      " [-0.0321  0.0158  0.0355 -0.0002 -0.0039]\n",
      " [-0.0123 -0.0054  0.006   0.     -0.0003]]\n",
      "mean_state_value -0.009817428792673152\n",
      "episode 56/600\n",
      "p1 0.6456 p0 0.08859999999999998\n",
      "trajectorySteps 50\n",
      "[[ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [32  6  1  0  0]\n",
      " [ 5  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0103 -0.0432 -0.0384 -0.0164 -0.0272]\n",
      " [-0.0282 -0.0182 -0.0126 -0.0081 -0.0015]\n",
      " [-0.0093 -0.0346  0.0039 -0.0035 -0.0009]\n",
      " [-0.0331  0.0158  0.0368 -0.0002 -0.0039]\n",
      " [-0.0123 -0.0054  0.006   0.     -0.0003]]\n",
      "mean_state_value -0.009794822805476429\n",
      "episode 57/600\n",
      "p1 0.6464000000000001 p0 0.08839999999999999\n",
      "trajectorySteps 56\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [46  4  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0103 -0.0431 -0.0383 -0.0164 -0.0271]\n",
      " [-0.0282 -0.0181 -0.0125 -0.0081 -0.0015]\n",
      " [-0.0095 -0.0354  0.0039 -0.0035 -0.0009]\n",
      " [-0.033   0.0165  0.0382 -0.0002 -0.0039]\n",
      " [-0.0122 -0.0054  0.006   0.     -0.0003]]\n",
      "mean_state_value -0.009733875936989551\n",
      "episode 58/600\n",
      "p1 0.6472 p0 0.08819999999999999\n",
      "trajectorySteps 27\n",
      "[[0 0 0 0 0]\n",
      " [6 1 2 2 1]\n",
      " [6 0 0 1 2]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️🔄⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0102 -0.043  -0.0382 -0.0163 -0.0271]\n",
      " [-0.029  -0.019  -0.0125 -0.0089 -0.0015]\n",
      " [-0.0095 -0.0353  0.0039 -0.0035 -0.0009]\n",
      " [-0.033   0.0165  0.038  -0.0002 -0.0039]\n",
      " [-0.0122 -0.0054  0.0067  0.     -0.0003]]\n",
      "mean_state_value -0.009791916359356818\n",
      "episode 59/600\n",
      "p1 0.648 p0 0.088\n",
      "trajectorySteps 61\n",
      "[[35  5  0  0  0]\n",
      " [ 9  0  0  0  0]\n",
      " [ 4  4  1  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0108 -0.0429 -0.0382 -0.0163 -0.027 ]\n",
      " [-0.0291 -0.0189 -0.0125 -0.0089 -0.0015]\n",
      " [-0.0096 -0.0361  0.0046 -0.0035 -0.0009]\n",
      " [-0.0329  0.0166  0.0394 -0.0002 -0.0039]\n",
      " [-0.0122 -0.0053  0.0067  0.     -0.0003]]\n",
      "mean_state_value -0.009746706363273872\n",
      "episode 60/600\n",
      "p1 0.6488 p0 0.08779999999999999\n",
      "trajectorySteps 33\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [15  6  0  0  0]\n",
      " [ 9  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0108 -0.0428 -0.0381 -0.0162 -0.0269]\n",
      " [-0.029  -0.0189 -0.0124 -0.0089 -0.0015]\n",
      " [-0.0097 -0.0361  0.0046 -0.0035 -0.0009]\n",
      " [-0.0339  0.0173  0.0407 -0.0002 -0.0039]\n",
      " [-0.0122 -0.0053  0.0067  0.     -0.0003]]\n",
      "mean_state_value -0.009683692642141003\n",
      "episode 61/600\n",
      "p1 0.6496 p0 0.08759999999999998\n",
      "trajectorySteps 34\n",
      "[[ 0  0  0  0  0]\n",
      " [ 6  0  0  0  0]\n",
      " [ 7  0  0  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [10  5  1  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0107 -0.0427 -0.038  -0.0162 -0.0269]\n",
      " [-0.0291 -0.0188 -0.0124 -0.0089 -0.0015]\n",
      " [-0.0097 -0.036   0.0046 -0.0035 -0.0009]\n",
      " [-0.0339  0.0173  0.0406 -0.0002 -0.0038]\n",
      " [-0.0149 -0.0063  0.0074  0.     -0.0003]]\n",
      "mean_state_value -0.009794552061473146\n",
      "episode 62/600\n",
      "p1 0.6504000000000001 p0 0.08739999999999999\n",
      "trajectorySteps 20\n",
      "[[0 0 0 0 0]\n",
      " [8 0 0 0 0]\n",
      " [7 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0107 -0.0426 -0.0379 -0.0162 -0.0268]\n",
      " [-0.0292 -0.0188 -0.0124 -0.0089 -0.0015]\n",
      " [-0.0098 -0.0359  0.0046 -0.0035 -0.0009]\n",
      " [-0.0346  0.018   0.0405 -0.0002 -0.0038]\n",
      " [-0.0148 -0.0063  0.0074  0.     -0.0003]]\n",
      "mean_state_value -0.009787731606195074\n",
      "episode 63/600\n",
      "p1 0.6512 p0 0.08719999999999999\n",
      "trajectorySteps 26\n",
      "[[4 1 5 0 3]\n",
      " [1 0 1 3 3]\n",
      " [1 0 0 1 0]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0107 -0.0425 -0.0388 -0.0161 -0.0268]\n",
      " [-0.0292 -0.0187 -0.0124 -0.0088 -0.0015]\n",
      " [-0.0098 -0.0358  0.0046 -0.0044 -0.0009]\n",
      " [-0.0346  0.018   0.0403  0.0005 -0.0038]\n",
      " [-0.0148 -0.0063  0.0074  0.     -0.0003]]\n",
      "mean_state_value -0.0098117695987089\n",
      "episode 64/600\n",
      "p1 0.652 p0 0.087\n",
      "trajectorySteps 32\n",
      "[[1 2 4 1 0]\n",
      " [1 0 1 2 7]\n",
      " [4 1 1 1 4]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0107 -0.0424 -0.0396 -0.0161 -0.0268]\n",
      " [-0.0291 -0.0187 -0.0123 -0.0088 -0.0015]\n",
      " [-0.0098 -0.0357  0.0053 -0.0052 -0.0009]\n",
      " [-0.0345  0.0181  0.0417  0.0005 -0.0038]\n",
      " [-0.0148 -0.0062  0.0074  0.     -0.0003]]\n",
      "mean_state_value -0.00977193708271777\n",
      "episode 65/600\n",
      "p1 0.6528 p0 0.08679999999999999\n",
      "trajectorySteps 18\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [2 0 2 0 0]\n",
      " [9 1 1 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0107 -0.0423 -0.0395 -0.0161 -0.0267]\n",
      " [-0.0291 -0.0187 -0.0123 -0.0088 -0.0015]\n",
      " [-0.0099 -0.0357  0.0053 -0.0052 -0.0009]\n",
      " [-0.0344  0.0181  0.043   0.0005 -0.0038]\n",
      " [-0.0157 -0.0062  0.0081  0.     -0.0003]]\n",
      "mean_state_value -0.009705761537747394\n",
      "episode 66/600\n",
      "p1 0.6536 p0 0.08659999999999998\n",
      "trajectorySteps 8\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 1 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0107 -0.0422 -0.0394 -0.016  -0.0266]\n",
      " [-0.029  -0.0186 -0.0123 -0.0088 -0.0015]\n",
      " [-0.01   -0.0364  0.006  -0.0052 -0.0009]\n",
      " [-0.0343  0.0182  0.0444  0.0005 -0.0038]\n",
      " [-0.0157 -0.0062  0.0081  0.     -0.0003]]\n",
      "mean_state_value -0.009630595964322673\n",
      "episode 67/600\n",
      "p1 0.6544000000000001 p0 0.08639999999999999\n",
      "trajectorySteps 14\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [7 2 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0106 -0.0422 -0.0393 -0.016  -0.0266]\n",
      " [-0.0289 -0.0186 -0.0123 -0.0088 -0.0015]\n",
      " [-0.01   -0.0372  0.006  -0.0052 -0.0009]\n",
      " [-0.0343  0.0189  0.0458  0.0005 -0.0038]\n",
      " [-0.0157 -0.0062  0.0082  0.     -0.0003]]\n",
      "mean_state_value -0.009553471144015465\n",
      "episode 68/600\n",
      "p1 0.6552 p0 0.08619999999999998\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 0 2 0 0]\n",
      " [3 2 1 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0106 -0.0421 -0.0392 -0.016  -0.0265]\n",
      " [-0.0289 -0.0185 -0.0122 -0.0087 -0.0015]\n",
      " [-0.01   -0.0371  0.006  -0.0052 -0.0009]\n",
      " [-0.0342  0.0189  0.0471  0.0005 -0.0038]\n",
      " [-0.0165 -0.007   0.0088  0.     -0.0003]]\n",
      "mean_state_value -0.009509453161738669\n",
      "episode 69/600\n",
      "p1 0.656 p0 0.086\n",
      "trajectorySteps 88\n",
      "[[38  2  0  0  0]\n",
      " [ 7  1  0  0  0]\n",
      " [29  4  1  0  0]\n",
      " [ 4  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0112 -0.042  -0.0391 -0.0159 -0.0265]\n",
      " [-0.0289 -0.0185 -0.0122 -0.0087 -0.0015]\n",
      " [-0.0102 -0.0387  0.0067 -0.0052 -0.0009]\n",
      " [-0.0343  0.019   0.0485  0.0005 -0.0038]\n",
      " [-0.0165 -0.007   0.0089  0.     -0.0003]]\n",
      "mean_state_value -0.009512926746721521\n",
      "episode 70/600\n",
      "p1 0.6568 p0 0.08579999999999999\n",
      "trajectorySteps 141\n",
      "[[47  5  2  0  2]\n",
      " [36  3  1  2 14]\n",
      " [ 8  2  0  2 13]\n",
      " [ 0  0  2  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.012  -0.0428 -0.0399 -0.0159 -0.0264]\n",
      " [-0.0306 -0.0185 -0.0122 -0.0087 -0.0016]\n",
      " [-0.0104 -0.0387  0.0068 -0.0052 -0.001 ]\n",
      " [-0.0342  0.019   0.0499  0.0012 -0.0046]\n",
      " [-0.0164 -0.007   0.0089  0.     -0.0003]]\n",
      "mean_state_value -0.009624218914321863\n",
      "episode 71/600\n",
      "p1 0.6576 p0 0.08559999999999998\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [4 1 0 0 0]\n",
      " [0 2 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.012  -0.0427 -0.0398 -0.0158 -0.0263]\n",
      " [-0.0306 -0.0184 -0.0122 -0.0087 -0.0016]\n",
      " [-0.0104 -0.0394  0.0068 -0.0051 -0.001 ]\n",
      " [-0.0341  0.0189  0.0513  0.0012 -0.0046]\n",
      " [-0.0164 -0.007   0.0089  0.     -0.0003]]\n",
      "mean_state_value -0.009578398006621178\n",
      "episode 72/600\n",
      "p1 0.6584000000000001 p0 0.08539999999999999\n",
      "trajectorySteps 33\n",
      "[[ 0  0  0  0  0]\n",
      " [ 3  1  0  0  0]\n",
      " [19  3  0  0  0]\n",
      " [ 3  1  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.012  -0.0426 -0.0397 -0.0158 -0.0263]\n",
      " [-0.0305 -0.0184 -0.0121 -0.0087 -0.0016]\n",
      " [-0.0106 -0.0402  0.0068 -0.0051 -0.001 ]\n",
      " [-0.035   0.0196  0.052   0.0012 -0.0046]\n",
      " [-0.0164 -0.007   0.0089  0.     -0.0003]]\n",
      "mean_state_value -0.009568590436047093\n",
      "episode 73/600\n",
      "p1 0.6592 p0 0.08519999999999998\n",
      "trajectorySteps 34\n",
      "[[ 5  0  0  0  0]\n",
      " [ 6  1  0  0  0]\n",
      " [10  4  0  0  0]\n",
      " [ 5  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0121 -0.0425 -0.0396 -0.0158 -0.0262]\n",
      " [-0.0315 -0.0184 -0.0121 -0.0086 -0.0016]\n",
      " [-0.0107 -0.0401  0.0068 -0.0051 -0.001 ]\n",
      " [-0.0357  0.0203  0.0519  0.0012 -0.0046]\n",
      " [-0.0163 -0.0069  0.0089  0.     -0.0003]]\n",
      "mean_state_value -0.00959653444677906\n",
      "episode 74/600\n",
      "p1 0.66 p0 0.08499999999999999\n",
      "trajectorySteps 29\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [13  0  0  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [ 7  3  2  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.012  -0.0424 -0.0395 -0.0157 -0.0261]\n",
      " [-0.0314 -0.0183 -0.0121 -0.0086 -0.0016]\n",
      " [-0.0108 -0.04    0.0068 -0.0051 -0.001 ]\n",
      " [-0.0357  0.0203  0.0533  0.0012 -0.0046]\n",
      " [-0.018  -0.0069  0.0088  0.     -0.0003]]\n",
      "mean_state_value -0.009587714234963901\n",
      "episode 75/600\n",
      "p1 0.6608 p0 0.08479999999999999\n",
      "trajectorySteps 32\n",
      "[[ 0  0  0  0  0]\n",
      " [ 4  1  0  0  0]\n",
      " [22  0  0  0  0]\n",
      " [ 2  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.012  -0.0423 -0.0394 -0.0157 -0.0261]\n",
      " [-0.0322 -0.0183 -0.012  -0.0086 -0.0016]\n",
      " [-0.011  -0.0399  0.0068 -0.0051 -0.001 ]\n",
      " [-0.0364  0.0211  0.0547  0.0012 -0.0046]\n",
      " [-0.018  -0.0069  0.0088  0.     -0.0003]]\n",
      "mean_state_value -0.009548291595252863\n",
      "episode 76/600\n",
      "p1 0.6616 p0 0.08459999999999998\n",
      "trajectorySteps 34\n",
      "[[ 0  0  0  0  1]\n",
      " [ 0  1  1  1  4]\n",
      " [ 7  1  0  0 10]\n",
      " [ 1  0  2  0  2]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.012  -0.0422 -0.0394 -0.0157 -0.026 ]\n",
      " [-0.0321 -0.0191 -0.012  -0.0086 -0.0016]\n",
      " [-0.0111 -0.0407  0.0069 -0.0051 -0.001 ]\n",
      " [-0.0363  0.0211  0.0561  0.0012 -0.0046]\n",
      " [-0.0179 -0.0069  0.0095  0.     -0.0003]]\n",
      "mean_state_value -0.009503038041087026\n",
      "episode 77/600\n",
      "p1 0.6624000000000001 p0 0.08439999999999999\n",
      "trajectorySteps 29\n",
      "[[0 0 0 0 0]\n",
      " [1 1 0 0 0]\n",
      " [4 2 0 0 0]\n",
      " [9 0 2 0 0]\n",
      " [8 1 1 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.012  -0.0421 -0.0393 -0.0156 -0.026 ]\n",
      " [-0.0328 -0.0191 -0.012  -0.0086 -0.0016]\n",
      " [-0.0111 -0.0406  0.0069 -0.0051 -0.001 ]\n",
      " [-0.0363  0.0212  0.0575  0.0012 -0.0046]\n",
      " [-0.0189 -0.0069  0.0102  0.     -0.0003]]\n",
      "mean_state_value -0.009463115086732352\n",
      "episode 78/600\n",
      "p1 0.6632 p0 0.08419999999999998\n",
      "trajectorySteps 48\n",
      "[[12  0  0  0  0]\n",
      " [ 7  0  0  0  0]\n",
      " [21  1  0  0  0]\n",
      " [ 4  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0121 -0.042  -0.0392 -0.0156 -0.0259]\n",
      " [-0.0328 -0.019  -0.012  -0.0085 -0.0016]\n",
      " [-0.0112 -0.0405  0.0069 -0.0051 -0.001 ]\n",
      " [-0.0371  0.0219  0.0574  0.0012 -0.0045]\n",
      " [-0.0188 -0.0068  0.0102  0.     -0.0003]]\n",
      "mean_state_value -0.00945329750750955\n",
      "episode 79/600\n",
      "p1 0.664 p0 0.08399999999999999\n",
      "trajectorySteps 132\n",
      "[[32  5  0  0  3]\n",
      " [29  2  2  5  5]\n",
      " [27  2  0  4  4]\n",
      " [ 1  0  2  0  2]\n",
      " [ 0  0  4  2  1]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "➡️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0125 -0.0427 -0.0391 -0.0156 -0.0259]\n",
      " [-0.0338 -0.0198 -0.0119 -0.0094 -0.0016]\n",
      " [-0.0114 -0.0404  0.0069 -0.005  -0.001 ]\n",
      " [-0.037   0.0219  0.0588  0.0012 -0.0046]\n",
      " [-0.0188 -0.0068  0.0109  0.     -0.0003]]\n",
      "mean_state_value -0.009514022299595085\n",
      "episode 80/600\n",
      "p1 0.6648000000000001 p0 0.08379999999999999\n",
      "trajectorySteps 14\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [4 5 2 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0125 -0.0426 -0.039  -0.0155 -0.0258]\n",
      " [-0.0337 -0.0198 -0.0119 -0.0093 -0.0016]\n",
      " [-0.0114 -0.0412  0.0068 -0.005  -0.001 ]\n",
      " [-0.0369  0.022   0.0596  0.0012 -0.0046]\n",
      " [-0.0187 -0.0068  0.0109  0.     -0.0003]]\n",
      "mean_state_value -0.0094887063394031\n",
      "episode 81/600\n",
      "p1 0.6656 p0 0.08359999999999998\n",
      "trajectorySteps 252\n",
      "[[ 14   4   0   0   0]\n",
      " [ 26   3   0   0   0]\n",
      " [163   7   0   0   0]\n",
      " [ 16   2   2   0   0]\n",
      " [ 15   0   0   0   0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0126 -0.0434 -0.0389 -0.0155 -0.0258]\n",
      " [-0.0355 -0.0198 -0.0119 -0.0093 -0.0016]\n",
      " [-0.0128 -0.0411  0.0068 -0.005  -0.001 ]\n",
      " [-0.0386  0.0227  0.061   0.0012 -0.0046]\n",
      " [-0.019  -0.0068  0.0109  0.     -0.0003]]\n",
      "mean_state_value -0.009628992766061458\n",
      "episode 82/600\n",
      "p1 0.6664000000000001 p0 0.08339999999999999\n",
      "trajectorySteps 9\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 1 1 0 0]\n",
      " [1 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0126 -0.0433 -0.0388 -0.0154 -0.0257]\n",
      " [-0.0354 -0.0197 -0.0119 -0.0093 -0.0016]\n",
      " [-0.0128 -0.0418  0.0075 -0.005  -0.001 ]\n",
      " [-0.0385  0.0228  0.0609  0.0012 -0.0046]\n",
      " [-0.0189 -0.0068  0.0109  0.     -0.0003]]\n",
      "mean_state_value -0.009601625148892656\n",
      "episode 83/600\n",
      "p1 0.6672 p0 0.08319999999999998\n",
      "trajectorySteps 17\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [8 5 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0126 -0.0432 -0.0387 -0.0154 -0.0257]\n",
      " [-0.0354 -0.0197 -0.0118 -0.0093 -0.0016]\n",
      " [-0.0129 -0.0426  0.0075 -0.005  -0.001 ]\n",
      " [-0.0385  0.0235  0.0623  0.0012 -0.0046]\n",
      " [-0.0189 -0.0068  0.011   0.     -0.0003]]\n",
      "mean_state_value -0.00952492760968977\n",
      "episode 84/600\n",
      "p1 0.668 p0 0.08299999999999999\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 2 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0126 -0.0431 -0.0386 -0.0154 -0.0256]\n",
      " [-0.0353 -0.0196 -0.0118 -0.0092 -0.0015]\n",
      " [-0.0131 -0.0425  0.0076 -0.005  -0.001 ]\n",
      " [-0.0392  0.0242  0.0637  0.0013 -0.0046]\n",
      " [-0.0188 -0.0068  0.011   0.     -0.0002]]\n",
      "mean_state_value -0.009443099510237953\n",
      "episode 85/600\n",
      "p1 0.6688000000000001 p0 0.08279999999999998\n",
      "trajectorySteps 41\n",
      "[[8 3 0 0 0]\n",
      " [6 5 0 0 0]\n",
      " [7 9 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0127 -0.0446 -0.0385 -0.0153 -0.0255]\n",
      " [-0.036  -0.0204 -0.0118 -0.0092 -0.0015]\n",
      " [-0.0132 -0.0441  0.0076 -0.005  -0.001 ]\n",
      " [-0.0391  0.025   0.0645  0.0013 -0.0045]\n",
      " [-0.0188 -0.0067  0.011   0.     -0.0002]]\n",
      "mean_state_value -0.009560214679330771\n",
      "episode 86/600\n",
      "p1 0.6696 p0 0.08259999999999998\n",
      "trajectorySteps 36\n",
      "[[21  0  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [ 3  1  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0131 -0.0445 -0.0385 -0.0153 -0.0255]\n",
      " [-0.036  -0.0204 -0.0118 -0.0092 -0.0015]\n",
      " [-0.0133 -0.044   0.0076 -0.005  -0.001 ]\n",
      " [-0.0398  0.0257  0.0659  0.0013 -0.0045]\n",
      " [-0.0188 -0.0067  0.011   0.     -0.0002]]\n",
      "mean_state_value -0.009498220339148204\n",
      "episode 87/600\n",
      "p1 0.6704000000000001 p0 0.08239999999999999\n",
      "trajectorySteps 72\n",
      "[[ 6  2  1  1  0]\n",
      " [ 1  0  0  1  1]\n",
      " [ 6  1  0  0  1]\n",
      " [38  1  2  0  1]\n",
      " [ 5  0  1  2  1]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0132 -0.0444 -0.0384 -0.0153 -0.0254]\n",
      " [-0.036  -0.0204 -0.0117 -0.0092 -0.0015]\n",
      " [-0.0133 -0.0439  0.0076 -0.005  -0.001 ]\n",
      " [-0.0408  0.0257  0.0673  0.0013 -0.0045]\n",
      " [-0.0189 -0.0067  0.0118  0.     -0.0002]]\n",
      "mean_state_value -0.009437629682776522\n",
      "episode 88/600\n",
      "p1 0.6712 p0 0.08219999999999998\n",
      "trajectorySteps 34\n",
      "[[8 0 0 0 0]\n",
      " [5 1 0 0 0]\n",
      " [4 1 0 0 0]\n",
      " [7 1 2 0 0]\n",
      " [5 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0132 -0.0443 -0.0383 -0.0152 -0.0254]\n",
      " [-0.0367 -0.0203 -0.0117 -0.0092 -0.0015]\n",
      " [-0.0134 -0.0438  0.0077 -0.0049 -0.001 ]\n",
      " [-0.0417  0.0265  0.0687  0.0013 -0.0045]\n",
      " [-0.019  -0.0067  0.0118  0.     -0.0002]]\n",
      "mean_state_value -0.009402645778184225\n",
      "episode 89/600\n",
      "p1 0.672 p0 0.08199999999999999\n",
      "trajectorySteps 18\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 2 0 0 0]\n",
      " [3 0 2 0 0]\n",
      " [6 1 1 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0132 -0.0442 -0.0382 -0.0152 -0.0253]\n",
      " [-0.0366 -0.0203 -0.0117 -0.0091 -0.0015]\n",
      " [-0.0134 -0.0437  0.0077 -0.0049 -0.001 ]\n",
      " [-0.0417  0.0265  0.0702  0.0013 -0.0045]\n",
      " [-0.0199 -0.0067  0.0125  0.     -0.0002]]\n",
      "mean_state_value -0.00932654543583951\n",
      "episode 90/600\n",
      "p1 0.6728000000000001 p0 0.08179999999999998\n",
      "trajectorySteps 76\n",
      "[[ 3  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [26  3  0  0  0]\n",
      " [26  1  2  0  0]\n",
      " [12  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "➡️🔄⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0132 -0.044  -0.0381 -0.0152 -0.0252]\n",
      " [-0.0365 -0.0202 -0.0116 -0.0091 -0.0015]\n",
      " [-0.0137 -0.0436  0.0077 -0.0049 -0.001 ]\n",
      " [-0.0427  0.0273  0.0701  0.0013 -0.0045]\n",
      " [-0.0201 -0.0066  0.0125  0.     -0.0002]]\n",
      "mean_state_value -0.009326803132791078\n",
      "episode 91/600\n",
      "p1 0.6736 p0 0.08159999999999998\n",
      "trajectorySteps 38\n",
      "[[12  1  3  1  0]\n",
      " [ 1  0  0  1  2]\n",
      " [ 1  0  0  0 12]\n",
      " [ 0  0  2  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "➡️🔄⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0135 -0.0439 -0.0382 -0.0151 -0.0252]\n",
      " [-0.0365 -0.0202 -0.0116 -0.0091 -0.0016]\n",
      " [-0.0136 -0.0435  0.0077 -0.0049 -0.0011]\n",
      " [-0.0426  0.0273  0.0701  0.002  -0.0053]\n",
      " [-0.0201 -0.0066  0.0126  0.     -0.0002]]\n",
      "mean_state_value -0.009318704793657852\n",
      "episode 92/600\n",
      "p1 0.6744000000000001 p0 0.08139999999999999\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 6 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️🔄⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0134 -0.0438 -0.0381 -0.0151 -0.0251]\n",
      " [-0.0364 -0.0201 -0.0116 -0.0091 -0.0016]\n",
      " [-0.0137 -0.0442  0.0084 -0.0049 -0.0011]\n",
      " [-0.0425  0.0274  0.0716  0.002  -0.0053]\n",
      " [-0.02   -0.0066  0.0126  0.     -0.0002]]\n",
      "mean_state_value -0.009232621579663225\n",
      "episode 93/600\n",
      "p1 0.6752 p0 0.08119999999999998\n",
      "trajectorySteps 229\n",
      "[[132  13  23   2   0]\n",
      " [ 17   3   2   4   6]\n",
      " [  8   7   2   1   5]\n",
      " [  2   0   2   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "🔄⏬⏩️🔄⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0156 -0.0454 -0.0383 -0.0151 -0.025 ]\n",
      " [-0.0365 -0.0209 -0.0124 -0.0099 -0.0017]\n",
      " [-0.0137 -0.0449  0.0092 -0.0057 -0.0012]\n",
      " [-0.0424  0.0274  0.073   0.002  -0.0053]\n",
      " [-0.02   -0.0066  0.0126  0.     -0.0002]]\n",
      "mean_state_value -0.00946016330939624\n",
      "episode 94/600\n",
      "p1 0.676 p0 0.08099999999999999\n",
      "trajectorySteps 33\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 3  1  2  0  0]\n",
      " [24  2  0  0  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "🔄⏬⏩️🔄⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0156 -0.0453 -0.0382 -0.015  -0.025 ]\n",
      " [-0.0365 -0.0209 -0.0123 -0.0098 -0.0017]\n",
      " [-0.0137 -0.0448  0.0092 -0.0057 -0.0012]\n",
      " [-0.0423  0.0282  0.0738  0.002  -0.0053]\n",
      " [-0.0219 -0.0074  0.0126  0.     -0.0002]]\n",
      "mean_state_value -0.009476614750012387\n",
      "episode 95/600\n",
      "p1 0.6768000000000001 p0 0.08079999999999998\n",
      "trajectorySteps 124\n",
      "[[ 3  0  0  0  0]\n",
      " [37  2  0  0  0]\n",
      " [66  6  0  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [ 1  1  3  1  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️🔄⬇️\n",
      "➡️⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0156 -0.0452 -0.0381 -0.015  -0.0249]\n",
      " [-0.0382 -0.0208 -0.0123 -0.0098 -0.0017]\n",
      " [-0.0144 -0.0448  0.0092 -0.0057 -0.0012]\n",
      " [-0.0423  0.0283  0.0752  0.002  -0.0052]\n",
      " [-0.0227 -0.0074  0.0134  0.0001 -0.0002]]\n",
      "mean_state_value -0.00949134955979488\n",
      "episode 96/600\n",
      "p1 0.6776 p0 0.08059999999999998\n",
      "trajectorySteps 13\n",
      "[[3 0 0 0 0]\n",
      " [4 0 0 0 0]\n",
      " [2 1 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️🔄⬇️\n",
      "➡️⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0156 -0.0451 -0.038  -0.015  -0.0249]\n",
      " [-0.0381 -0.0208 -0.0123 -0.0098 -0.0017]\n",
      " [-0.0144 -0.0455  0.01   -0.0056 -0.0012]\n",
      " [-0.0422  0.0283  0.0767  0.002  -0.0052]\n",
      " [-0.0226 -0.0073  0.0134  0.0001 -0.0002]]\n",
      "mean_state_value -0.009402472406148521\n",
      "episode 97/600\n",
      "p1 0.6784000000000001 p0 0.08039999999999999\n",
      "trajectorySteps 59\n",
      "[[ 0  0  4  5  2]\n",
      " [ 1  0  2  9  5]\n",
      " [11  9  1  0  3]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  2]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0156 -0.045  -0.038  -0.0149 -0.0248]\n",
      " [-0.0381 -0.0207 -0.0123 -0.0106 -0.0017]\n",
      " [-0.0145 -0.0462  0.0092 -0.0056 -0.0012]\n",
      " [-0.0421  0.0284  0.0781  0.002  -0.0052]\n",
      " [-0.0226 -0.0073  0.0141  0.0001 -0.0003]]\n",
      "mean_state_value -0.009392027016364022\n",
      "episode 98/600\n",
      "p1 0.6792 p0 0.08019999999999998\n",
      "trajectorySteps 39\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [15  2  2  0  0]\n",
      " [18  1  0  0  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0156 -0.0449 -0.0379 -0.0149 -0.0247]\n",
      " [-0.038  -0.0207 -0.0122 -0.0105 -0.0017]\n",
      " [-0.0145 -0.0461  0.0092 -0.0056 -0.0012]\n",
      " [-0.0429  0.0291  0.0795  0.002  -0.0052]\n",
      " [-0.0237 -0.0081  0.0142  0.0001 -0.0003]]\n",
      "mean_state_value -0.00938313185411002\n",
      "episode 99/600\n",
      "p1 0.68 p0 0.07999999999999999\n",
      "trajectorySteps 140\n",
      "[[ 4  0  0  0  0]\n",
      " [11  4  0  0  0]\n",
      " [96 11  1  0  0]\n",
      " [11  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0157 -0.0448 -0.0378 -0.0149 -0.0247]\n",
      " [-0.0402 -0.0215 -0.0122 -0.0105 -0.0017]\n",
      " [-0.0153 -0.0468  0.01   -0.0056 -0.0012]\n",
      " [-0.0429  0.0292  0.081   0.002  -0.0052]\n",
      " [-0.0236 -0.0081  0.0142  0.0001 -0.0003]]\n",
      "mean_state_value -0.009465914550752585\n",
      "episode 100/600\n",
      "p1 0.6808000000000001 p0 0.07979999999999998\n",
      "trajectorySteps 48\n",
      "[[ 5  0  0  0  0]\n",
      " [18  0  0  0  0]\n",
      " [17  1  0  0  0]\n",
      " [ 4  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0159 -0.0447 -0.0377 -0.0148 -0.0246]\n",
      " [-0.0402 -0.0214 -0.0122 -0.0105 -0.0017]\n",
      " [-0.0154 -0.0467  0.01   -0.0056 -0.0012]\n",
      " [-0.0437  0.0299  0.081   0.0021 -0.0052]\n",
      " [-0.0236 -0.0081  0.0142  0.0001 -0.0003]]\n",
      "mean_state_value -0.009448683422817926\n",
      "episode 101/600\n",
      "p1 0.6816 p0 0.07959999999999998\n",
      "trajectorySteps 80\n",
      "[[34  3  0  0  0]\n",
      " [11  3  0  0  0]\n",
      " [12  6  1  0  0]\n",
      " [ 5  0  2  0  0]\n",
      " [ 3  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0165 -0.0453 -0.0376 -0.0148 -0.0246]\n",
      " [-0.041  -0.0214 -0.0121 -0.0105 -0.0017]\n",
      " [-0.0156 -0.0482  0.0107 -0.0056 -0.0012]\n",
      " [-0.0436  0.03    0.0824  0.0021 -0.0052]\n",
      " [-0.0236 -0.008   0.0142  0.0001 -0.0003]]\n",
      "mean_state_value -0.009487980707909134\n",
      "episode 102/600\n",
      "p1 0.6824000000000001 p0 0.07939999999999998\n",
      "trajectorySteps 23\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  1  0  0  0]\n",
      " [10  9  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0452 -0.0376 -0.0148 -0.0245]\n",
      " [-0.0409 -0.0214 -0.0121 -0.0104 -0.0016]\n",
      " [-0.0157 -0.0497  0.0115 -0.0056 -0.0012]\n",
      " [-0.0435  0.0301  0.0839  0.0021 -0.0052]\n",
      " [-0.0236 -0.008   0.0143  0.0001 -0.0003]]\n",
      "mean_state_value -0.00943159130187083\n",
      "episode 103/600\n",
      "p1 0.6832 p0 0.07919999999999998\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [1 0 2 0 0]\n",
      " [2 1 4 2 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0164 -0.0451 -0.0375 -0.0147 -0.0244]\n",
      " [-0.0408 -0.0213 -0.0121 -0.0104 -0.0016]\n",
      " [-0.0157 -0.0496  0.0115 -0.0055 -0.0012]\n",
      " [-0.0434  0.0301  0.0854  0.0021 -0.0051]\n",
      " [-0.0243 -0.008   0.0149 -0.     -0.0003]]\n",
      "mean_state_value -0.009341205898264997\n",
      "episode 104/600\n",
      "p1 0.684 p0 0.07899999999999999\n",
      "trajectorySteps 56\n",
      "[[10  2  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [32  4  1  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.045  -0.0374 -0.0147 -0.0244]\n",
      " [-0.0407 -0.0213 -0.012  -0.0104 -0.0016]\n",
      " [-0.0159 -0.0503  0.0123 -0.0055 -0.0011]\n",
      " [-0.0433  0.0302  0.0854  0.0021 -0.0051]\n",
      " [-0.0242 -0.008   0.015  -0.     -0.0003]]\n",
      "mean_state_value -0.009325096081157197\n",
      "episode 105/600\n",
      "p1 0.6848000000000001 p0 0.07879999999999998\n",
      "trajectorySteps 16\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 0 0 0]\n",
      " [2 2 2 0 0]\n",
      " [8 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0449 -0.0373 -0.0146 -0.0243]\n",
      " [-0.0406 -0.0212 -0.012  -0.0103 -0.0016]\n",
      " [-0.0159 -0.0509  0.0123 -0.0055 -0.0011]\n",
      " [-0.044   0.0309  0.0869  0.0021 -0.0051]\n",
      " [-0.0244 -0.0079  0.015  -0.     -0.0003]]\n",
      "mean_state_value -0.009268578791599747\n",
      "episode 106/600\n",
      "p1 0.6856 p0 0.07859999999999998\n",
      "trajectorySteps 25\n",
      "[[0 0 0 1 0]\n",
      " [3 3 2 3 0]\n",
      " [4 2 1 1 0]\n",
      " [3 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0166 -0.0448 -0.0372 -0.0146 -0.0242]\n",
      " [-0.0413 -0.022  -0.012  -0.0111 -0.0016]\n",
      " [-0.0159 -0.0523  0.0131 -0.0063 -0.0011]\n",
      " [-0.0439  0.031   0.0883  0.0021 -0.0051]\n",
      " [-0.0244 -0.0079  0.015  -0.     -0.0003]]\n",
      "mean_state_value -0.009332719642660848\n",
      "episode 107/600\n",
      "p1 0.6864000000000001 p0 0.07839999999999998\n",
      "trajectorySteps 19\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [7 0 0 0 0]\n",
      " [7 0 2 0 0]\n",
      " [1 1 1 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0165 -0.0447 -0.0371 -0.0146 -0.0242]\n",
      " [-0.0412 -0.0219 -0.012  -0.0111 -0.0016]\n",
      " [-0.016  -0.0522  0.0131 -0.0063 -0.0011]\n",
      " [-0.044   0.0311  0.0884  0.0021 -0.0051]\n",
      " [-0.0251 -0.0079  0.0158 -0.     -0.0003]]\n",
      "mean_state_value -0.00930083072167909\n",
      "episode 108/600\n",
      "p1 0.6872 p0 0.07819999999999998\n",
      "trajectorySteps 37\n",
      "[[16  1  1  1  1]\n",
      " [ 1  0  0  1  5]\n",
      " [ 1  0  0  0  3]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0446 -0.037  -0.0145 -0.0241]\n",
      " [-0.0411 -0.0219 -0.0119 -0.011  -0.0017]\n",
      " [-0.016  -0.0521  0.0131 -0.0063 -0.0011]\n",
      " [-0.0439  0.0311  0.0898  0.0021 -0.0051]\n",
      " [-0.025  -0.0079  0.0165  0.     -0.0003]]\n",
      "mean_state_value -0.009189395128228466\n",
      "episode 109/600\n",
      "p1 0.6880000000000001 p0 0.07799999999999999\n",
      "trajectorySteps 47\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 5  0  2  0  0]\n",
      " [37  1  1  0  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0445 -0.0369 -0.0145 -0.0241]\n",
      " [-0.041  -0.0218 -0.0119 -0.011  -0.0017]\n",
      " [-0.016  -0.052   0.0131 -0.0062 -0.0011]\n",
      " [-0.0438  0.0312  0.0913  0.0021 -0.0051]\n",
      " [-0.0263 -0.0078  0.0173  0.     -0.0003]]\n",
      "mean_state_value -0.00911489257594878\n",
      "episode 110/600\n",
      "p1 0.6888000000000001 p0 0.07779999999999998\n",
      "trajectorySteps 132\n",
      "[[ 2  1  1  1  4]\n",
      " [ 1  0  1  9 57]\n",
      " [ 1  0  1  4 45]\n",
      " [ 0  0  2  0  2]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️⬇️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0169 -0.0444 -0.0368 -0.0145 -0.024 ]\n",
      " [-0.0409 -0.0218 -0.0119 -0.0118 -0.0023]\n",
      " [-0.0159 -0.0518  0.0139 -0.007  -0.0011]\n",
      " [-0.0437  0.0312  0.0921  0.0021 -0.005 ]\n",
      " [-0.0263 -0.0078  0.0173  0.     -0.0003]]\n",
      "mean_state_value -0.00910196881955202\n",
      "episode 111/600\n",
      "p1 0.6896 p0 0.07759999999999997\n",
      "trajectorySteps 33\n",
      "[[ 0  0  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [21  4  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️🔄\n",
      "🔄🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0168 -0.0442 -0.0368 -0.0144 -0.0239]\n",
      " [-0.0408 -0.0217 -0.0119 -0.0117 -0.0023]\n",
      " [-0.0161 -0.0517  0.0139 -0.007  -0.0011]\n",
      " [-0.0443  0.032   0.093   0.0021 -0.005 ]\n",
      " [-0.0262 -0.0078  0.0174  0.     -0.0003]]\n",
      "mean_state_value -0.009035654298886937\n",
      "episode 112/600\n",
      "p1 0.6904000000000001 p0 0.07739999999999998\n",
      "trajectorySteps 88\n",
      "[[23  0  0  0  0]\n",
      " [27  2  0  0  0]\n",
      " [22  8  0  0  0]\n",
      " [ 3  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️🔄\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0173 -0.0441 -0.0367 -0.0144 -0.0239]\n",
      " [-0.0417 -0.0217 -0.0118 -0.0117 -0.0023]\n",
      " [-0.0162 -0.0532  0.014  -0.007  -0.0011]\n",
      " [-0.0443  0.0328  0.0944  0.0021 -0.005 ]\n",
      " [-0.0261 -0.0078  0.0174  0.     -0.0003]]\n",
      "mean_state_value -0.009032639734076853\n",
      "episode 113/600\n",
      "p1 0.6912 p0 0.07719999999999998\n",
      "trajectorySteps 199\n",
      "[[  0   0   0   0   0]\n",
      " [ 14   0   0   0   0]\n",
      " [159  11   0   0   0]\n",
      " [  9   0   2   0   0]\n",
      " [  1   2   1   0   0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏬⏩️⬇️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0172 -0.044  -0.0366 -0.0144 -0.0238]\n",
      " [-0.0418 -0.0216 -0.0118 -0.0117 -0.0023]\n",
      " [-0.0174 -0.0531  0.014  -0.0069 -0.0011]\n",
      " [-0.0442  0.0328  0.0953  0.0021 -0.005 ]\n",
      " [-0.0268 -0.0085  0.0181  0.     -0.0003]]\n",
      "mean_state_value -0.009055409546520073\n",
      "episode 114/600\n",
      "p1 0.6920000000000001 p0 0.07699999999999999\n",
      "trajectorySteps 47\n",
      "[[19  2  0  0  0]\n",
      " [ 2  2  0  0  0]\n",
      " [ 4  1  0  0  0]\n",
      " [ 5  0  2  0  0]\n",
      " [ 8  1  1  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0174 -0.0447 -0.0365 -0.0143 -0.0238]\n",
      " [-0.0417 -0.0224 -0.0118 -0.0116 -0.0023]\n",
      " [-0.0175 -0.053   0.014  -0.0069 -0.0011]\n",
      " [-0.0441  0.0329  0.0954  0.0021 -0.005 ]\n",
      " [-0.0278 -0.0085  0.0189  0.     -0.0003]]\n",
      "mean_state_value -0.009096079950628997\n",
      "episode 115/600\n",
      "p1 0.6928000000000001 p0 0.07679999999999998\n",
      "trajectorySteps 310\n",
      "[[217  19   1   1   0]\n",
      " [ 34   4   0   1   0]\n",
      " [  6   0   0   2  12]\n",
      " [  7   0   2   0   1]\n",
      " [  0   0   1   1   1]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️🔄\n",
      "⬇️⬅️⏬🔄🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.02   -0.0449 -0.0364 -0.0143 -0.0237]\n",
      " [-0.0442 -0.0231 -0.0117 -0.0116 -0.0023]\n",
      " [-0.0176 -0.0529  0.0141 -0.0069 -0.0013]\n",
      " [-0.0442  0.033   0.0968  0.0021 -0.005 ]\n",
      " [-0.0277 -0.0085  0.0197  0.     -0.0003]]\n",
      "mean_state_value -0.009238430616085407\n",
      "episode 116/600\n",
      "p1 0.6936 p0 0.07659999999999997\n",
      "trajectorySteps 16\n",
      "[[0 0 0 0 0]\n",
      " [0 1 0 0 0]\n",
      " [7 2 0 0 0]\n",
      " [3 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️🔄\n",
      "⬆️⬅️⏬🔄🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.02   -0.0448 -0.0363 -0.0143 -0.0236]\n",
      " [-0.0441 -0.0231 -0.0117 -0.0116 -0.0023]\n",
      " [-0.0178 -0.0535  0.0141 -0.0069 -0.0013]\n",
      " [-0.0448  0.0337  0.0983  0.0021 -0.005 ]\n",
      " [-0.0276 -0.0084  0.0197  0.     -0.0003]]\n",
      "mean_state_value -0.009174638556402175\n",
      "episode 117/600\n",
      "p1 0.6944000000000001 p0 0.07639999999999998\n",
      "trajectorySteps 37\n",
      "[[ 0  0  0  0  0]\n",
      " [14  1  0  0  0]\n",
      " [16  3  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️🔄\n",
      "⬇️⬅️⏬🔄🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.02   -0.0447 -0.0362 -0.0142 -0.0236]\n",
      " [-0.045  -0.023  -0.0117 -0.0116 -0.0023]\n",
      " [-0.0178 -0.0541  0.0149 -0.0069 -0.0013]\n",
      " [-0.0447  0.0338  0.0998  0.0021 -0.005 ]\n",
      " [-0.0276 -0.0084  0.0197  0.     -0.0003]]\n",
      "mean_state_value -0.009114519493556315\n",
      "episode 118/600\n",
      "p1 0.6952 p0 0.07619999999999998\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [1 1 1 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️🔄\n",
      "⬇️⬅️⏬🔄🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0199 -0.0446 -0.0361 -0.0142 -0.0235]\n",
      " [-0.0449 -0.023  -0.0116 -0.0115 -0.0023]\n",
      " [-0.0179 -0.054   0.0149 -0.0069 -0.0013]\n",
      " [-0.0453  0.0331  0.0999  0.0021 -0.0049]\n",
      " [-0.0275 -0.0084  0.0205  0.     -0.0003]]\n",
      "mean_state_value -0.009100805020525153\n",
      "episode 119/600\n",
      "p1 0.6960000000000001 p0 0.07599999999999998\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [4 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️🔄\n",
      "⬇️⬅️⏬🔄🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0199 -0.0445 -0.036  -0.0142 -0.0235]\n",
      " [-0.0447 -0.0229 -0.0116 -0.0115 -0.0023]\n",
      " [-0.018  -0.0538  0.0149 -0.0068 -0.0013]\n",
      " [-0.046   0.0339  0.1014  0.0022 -0.0049]\n",
      " [-0.0275 -0.0084  0.0206  0.     -0.0003]]\n",
      "mean_state_value -0.009002758671789584\n",
      "episode 120/600\n",
      "p1 0.6968000000000001 p0 0.07579999999999998\n",
      "trajectorySteps 18\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 0 0 0 0]\n",
      " [6 0 2 0 0]\n",
      " [3 1 2 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️🔄\n",
      "⬇️⬅️⏬🔄🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0198 -0.0443 -0.0359 -0.0141 -0.0234]\n",
      " [-0.0446 -0.0229 -0.0116 -0.0115 -0.0023]\n",
      " [-0.0181 -0.0537  0.015  -0.0068 -0.0013]\n",
      " [-0.046   0.034   0.1029  0.0022 -0.0049]\n",
      " [-0.0282 -0.0083  0.0213  0.     -0.0003]]\n",
      "mean_state_value -0.008905857772719251\n",
      "episode 121/600\n",
      "p1 0.6976 p0 0.07559999999999997\n",
      "trajectorySteps 39\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [13  2  0  0  0]\n",
      " [16  1  2  0  0]\n",
      " [ 4  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️🔄\n",
      "➡️⬅️⏬🔄🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0198 -0.0442 -0.0359 -0.0141 -0.0233]\n",
      " [-0.0445 -0.0228 -0.0116 -0.0114 -0.0023]\n",
      " [-0.0182 -0.0536  0.015  -0.0068 -0.0013]\n",
      " [-0.0468  0.0348  0.103   0.0022 -0.0049]\n",
      " [-0.0283 -0.0083  0.0214  0.     -0.0003]]\n",
      "mean_state_value -0.008878977927355139\n",
      "episode 122/600\n",
      "p1 0.6984000000000001 p0 0.07539999999999998\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️🔄\n",
      "⬆️⬅️⏬🔄🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0198 -0.0441 -0.0358 -0.0141 -0.0233]\n",
      " [-0.0444 -0.0227 -0.0115 -0.0114 -0.0023]\n",
      " [-0.0182 -0.0542  0.015  -0.0068 -0.0013]\n",
      " [-0.0467  0.0356  0.1039  0.0022 -0.0049]\n",
      " [-0.0282 -0.0083  0.0214  0.     -0.0003]]\n",
      "mean_state_value -0.008803221171879459\n",
      "episode 123/600\n",
      "p1 0.6992 p0 0.07519999999999998\n",
      "trajectorySteps 85\n",
      "[[ 1  1  0  0  1]\n",
      " [12  1  1  3 25]\n",
      " [11  0  2  5  0]\n",
      " [12  1  2  0  0]\n",
      " [ 7  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0197 -0.0447 -0.0357 -0.014  -0.0232]\n",
      " [-0.0445 -0.0234 -0.0115 -0.0114 -0.0025]\n",
      " [-0.0182 -0.0541  0.0151 -0.0075 -0.0013]\n",
      " [-0.0474  0.0356  0.1054  0.0022 -0.0049]\n",
      " [-0.0283 -0.0082  0.0214  0.     -0.0003]]\n",
      "mean_state_value -0.008844388495059818\n",
      "episode 124/600\n",
      "p1 0.7000000000000001 p0 0.07499999999999998\n",
      "trajectorySteps 31\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [13  9  0  0  0]\n",
      " [ 2  1  2  0  0]\n",
      " [ 0  1  2  1  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "🔄🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0197 -0.0446 -0.0356 -0.014  -0.0231]\n",
      " [-0.0444 -0.0234 -0.0115 -0.0113 -0.0025]\n",
      " [-0.0184 -0.0547  0.0151 -0.0075 -0.0013]\n",
      " [-0.0473  0.0349  0.1069  0.0022 -0.0049]\n",
      " [-0.0282 -0.0082  0.0222  0.     -0.0003]]\n",
      "mean_state_value -0.008780403654543495\n",
      "episode 125/600\n",
      "p1 0.7008000000000001 p0 0.07479999999999998\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [6 3 1 0 0]\n",
      " [1 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0197 -0.0445 -0.0355 -0.0139 -0.0231]\n",
      " [-0.0443 -0.0233 -0.0114 -0.0113 -0.0025]\n",
      " [-0.0185 -0.0553  0.0159 -0.0075 -0.0013]\n",
      " [-0.0472  0.035   0.1084  0.0022 -0.0049]\n",
      " [-0.0281 -0.0082  0.0223  0.     -0.0003]]\n",
      "mean_state_value -0.008678436383845332\n",
      "episode 126/600\n",
      "p1 0.7016 p0 0.07459999999999997\n",
      "trajectorySteps 76\n",
      "[[ 8 12  0  0  0]\n",
      " [ 3  2  0  0  0]\n",
      " [35  9  2  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0199 -0.0461 -0.0354 -0.0139 -0.023 ]\n",
      " [-0.0442 -0.0233 -0.0114 -0.0113 -0.0025]\n",
      " [-0.0189 -0.056   0.0159 -0.0075 -0.0013]\n",
      " [-0.0471  0.0351  0.1085  0.0022 -0.0048]\n",
      " [-0.0281 -0.0082  0.0223  0.     -0.0003]]\n",
      "mean_state_value -0.008757578990634866\n",
      "episode 127/600\n",
      "p1 0.7024000000000001 p0 0.07439999999999998\n",
      "trajectorySteps 35\n",
      "[[ 6  3  0  0  0]\n",
      " [ 9  2  0  0  0]\n",
      " [11  1  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0201 -0.0467 -0.0353 -0.0139 -0.023 ]\n",
      " [-0.0448 -0.0233 -0.0114 -0.0113 -0.0025]\n",
      " [-0.0189 -0.0566  0.0167 -0.0074 -0.0013]\n",
      " [-0.047   0.0351  0.11    0.0022 -0.0048]\n",
      " [-0.028  -0.0081  0.0223  0.     -0.0003]]\n",
      "mean_state_value -0.008725661950194329\n",
      "episode 128/600\n",
      "p1 0.7032 p0 0.07419999999999997\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [1 2 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0201 -0.0466 -0.0352 -0.0138 -0.0229]\n",
      " [-0.0447 -0.0232 -0.0114 -0.0112 -0.0025]\n",
      " [-0.0189 -0.0564  0.0168 -0.0074 -0.0012]\n",
      " [-0.0475  0.0352  0.1102  0.0022 -0.0048]\n",
      " [-0.0279 -0.0081  0.0224  0.     -0.0003]]\n",
      "mean_state_value -0.00870235355919194\n",
      "episode 129/600\n",
      "p1 0.7040000000000001 p0 0.07399999999999998\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 1 1 0 0]\n",
      " [1 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.02   -0.0464 -0.0351 -0.0138 -0.0228]\n",
      " [-0.0446 -0.0232 -0.0113 -0.0112 -0.0025]\n",
      " [-0.0189 -0.057   0.0176 -0.0074 -0.0012]\n",
      " [-0.0474  0.0353  0.1117  0.0022 -0.0048]\n",
      " [-0.0279 -0.0081  0.0224  0.     -0.0003]]\n",
      "mean_state_value -0.008594959684493798\n",
      "episode 130/600\n",
      "p1 0.7048000000000001 p0 0.07379999999999998\n",
      "trajectorySteps 59\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [18  2  0  0  0]\n",
      " [19  5  2  0  0]\n",
      " [11  1  1  0  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.02   -0.0463 -0.035  -0.0138 -0.0228]\n",
      " [-0.0445 -0.0231 -0.0113 -0.0112 -0.0025]\n",
      " [-0.0191 -0.0569  0.0176 -0.0074 -0.0012]\n",
      " [-0.0495  0.0332  0.1132  0.0022 -0.0048]\n",
      " [-0.0281 -0.0081  0.0232  0.     -0.0003]]\n",
      "mean_state_value -0.008652479307243966\n",
      "episode 131/600\n",
      "p1 0.7056 p0 0.07359999999999997\n",
      "trajectorySteps 22\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [12  1  1  0  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.02   -0.0462 -0.0349 -0.0137 -0.0227]\n",
      " [-0.0444 -0.0231 -0.0113 -0.0111 -0.0024]\n",
      " [-0.0191 -0.0567  0.0176 -0.0074 -0.0012]\n",
      " [-0.0494  0.0332  0.1147  0.0022 -0.0048]\n",
      " [-0.0289 -0.008   0.024   0.     -0.0003]]\n",
      "mean_state_value -0.008555098065329548\n",
      "episode 132/600\n",
      "p1 0.7064000000000001 p0 0.07339999999999998\n",
      "trajectorySteps 94\n",
      "[[31  2  0  0  0]\n",
      " [32  3  0  0  0]\n",
      " [14  1  0  0  0]\n",
      " [ 4  0  2  0  0]\n",
      " [ 3  1  1  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0205 -0.0461 -0.0349 -0.0137 -0.0227]\n",
      " [-0.0467 -0.023  -0.0112 -0.0111 -0.0024]\n",
      " [-0.0193 -0.0566  0.0177 -0.0073 -0.0012]\n",
      " [-0.0494  0.0333  0.1163  0.0022 -0.0048]\n",
      " [-0.0297 -0.008   0.0248  0.     -0.0003]]\n",
      "mean_state_value -0.008586234206251632\n",
      "episode 133/600\n",
      "p1 0.7072 p0 0.07319999999999997\n",
      "trajectorySteps 54\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [33  4  1  0  0]\n",
      " [ 7  2  2  0  0]\n",
      " [ 4  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0205 -0.046  -0.0348 -0.0136 -0.0226]\n",
      " [-0.0466 -0.023  -0.0112 -0.0111 -0.0024]\n",
      " [-0.0196 -0.0572  0.0177 -0.0073 -0.0012]\n",
      " [-0.0501  0.0334  0.1178  0.0022 -0.0047]\n",
      " [-0.0296 -0.008   0.0248  0.     -0.0003]]\n",
      "mean_state_value -0.008551489517414461\n",
      "episode 134/600\n",
      "p1 0.7080000000000001 p0 0.07299999999999998\n",
      "trajectorySteps 40\n",
      "[[ 0  0  0  0  0]\n",
      " [10  1  0  0  0]\n",
      " [14 10  2  1  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0205 -0.0459 -0.0347 -0.0136 -0.0225]\n",
      " [-0.0473 -0.0229 -0.0112 -0.011  -0.0024]\n",
      " [-0.0197 -0.0578  0.0185 -0.008  -0.0012]\n",
      " [-0.05    0.0335  0.1193  0.0022 -0.0047]\n",
      " [-0.0295 -0.0079  0.0249  0.     -0.0003]]\n",
      "mean_state_value -0.008509582064994182\n",
      "episode 135/600\n",
      "p1 0.7088000000000001 p0 0.07279999999999998\n",
      "trajectorySteps 45\n",
      "[[ 0  0  0  0  0]\n",
      " [ 5  2  0  0  0]\n",
      " [19 14  1  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0204 -0.0457 -0.0346 -0.0136 -0.0225]\n",
      " [-0.0479 -0.0229 -0.0111 -0.011  -0.0024]\n",
      " [-0.0198 -0.0592  0.0193 -0.008  -0.0012]\n",
      " [-0.0499  0.0336  0.1208  0.0022 -0.0047]\n",
      " [-0.0295 -0.0079  0.0249  0.     -0.0003]]\n",
      "mean_state_value -0.008468473002407129\n",
      "episode 136/600\n",
      "p1 0.7096 p0 0.07259999999999997\n",
      "trajectorySteps 35\n",
      "[[ 0  0  0  0  0]\n",
      " [ 6  0  0  0  0]\n",
      " [22  1  0  0  0]\n",
      " [ 2  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0204 -0.0456 -0.0345 -0.0135 -0.0224]\n",
      " [-0.0479 -0.0229 -0.0111 -0.011  -0.0024]\n",
      " [-0.0201 -0.059   0.0194 -0.008  -0.0012]\n",
      " [-0.0504  0.0337  0.1217  0.0022 -0.0047]\n",
      " [-0.0294 -0.0079  0.0249  0.     -0.0003]]\n",
      "mean_state_value -0.00842786486948827\n",
      "episode 137/600\n",
      "p1 0.7104000000000001 p0 0.07239999999999998\n",
      "trajectorySteps 22\n",
      "[[8 6 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [1 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0206 -0.0462 -0.0344 -0.0135 -0.0223]\n",
      " [-0.0478 -0.0228 -0.0111 -0.011  -0.0024]\n",
      " [-0.0201 -0.0596  0.0194 -0.008  -0.0012]\n",
      " [-0.0503  0.0345  0.1233  0.0022 -0.0047]\n",
      " [-0.0293 -0.0079  0.025   0.     -0.0003]]\n",
      "mean_state_value -0.008360637532363692\n",
      "episode 138/600\n",
      "p1 0.7112 p0 0.07219999999999997\n",
      "trajectorySteps 29\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [12  1  0  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [ 3  4  5  1  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.060e-02 -4.610e-02 -3.430e-02 -1.350e-02 -2.230e-02]\n",
      " [-4.770e-02 -2.280e-02 -1.110e-02 -1.090e-02 -2.400e-03]\n",
      " [-2.010e-02 -5.940e-02  1.950e-02 -7.900e-03 -1.200e-03]\n",
      " [-5.020e-02  3.460e-02  1.248e-01  2.300e-03 -4.700e-03]\n",
      " [-3.000e-02 -7.900e-03  2.430e-02  1.000e-04 -3.000e-04]]\n",
      "mean_state_value -0.00831043077638955\n",
      "episode 139/600\n",
      "p1 0.7120000000000001 p0 0.07199999999999998\n",
      "trajectorySteps 148\n",
      "[[  0   0   0   0   0]\n",
      " [  3   1   0   0   0]\n",
      " [113  12   1   0   0]\n",
      " [ 16   0   2   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬆️⏬⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.050e-02 -4.600e-02 -3.420e-02 -1.340e-02 -2.220e-02]\n",
      " [-4.760e-02 -2.270e-02 -1.100e-02 -1.090e-02 -2.400e-03]\n",
      " [-2.080e-02 -6.080e-02  2.030e-02 -7.900e-03 -1.200e-03]\n",
      " [-5.040e-02  3.470e-02  1.264e-01  2.300e-03 -4.700e-03]\n",
      " [-2.990e-02 -7.800e-03  2.440e-02  1.000e-04 -3.000e-04]]\n",
      "mean_state_value -0.008275514719628863\n",
      "episode 140/600\n",
      "p1 0.7128000000000001 p0 0.07179999999999997\n",
      "trajectorySteps 102\n",
      "[[22 19  3  1  3]\n",
      " [ 5  1  0  1  8]\n",
      " [ 4  2  0  3 27]\n",
      " [ 0  0  2  1  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.110e-02 -4.680e-02 -3.420e-02 -1.340e-02 -2.220e-02]\n",
      " [-4.750e-02 -2.270e-02 -1.100e-02 -1.090e-02 -2.500e-03]\n",
      " [-2.090e-02 -6.070e-02  2.030e-02 -8.600e-03 -1.300e-03]\n",
      " [-5.030e-02  3.480e-02  1.273e-01  3.100e-03 -4.700e-03]\n",
      " [-2.980e-02 -7.800e-03  2.440e-02  1.000e-04 -3.000e-04]]\n",
      "mean_state_value -0.008266921393327463\n",
      "episode 141/600\n",
      "p1 0.7136 p0 0.07159999999999997\n",
      "trajectorySteps 79\n",
      "[[ 2  0  0  0  0]\n",
      " [10  1  0  0  0]\n",
      " [ 9  0  0  0  0]\n",
      " [ 6  0  2  0  0]\n",
      " [47  1  1  0  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.110e-02 -4.670e-02 -3.410e-02 -1.340e-02 -2.210e-02]\n",
      " [-4.810e-02 -2.270e-02 -1.100e-02 -1.080e-02 -2.500e-03]\n",
      " [-2.120e-02 -6.050e-02  2.040e-02 -8.600e-03 -1.300e-03]\n",
      " [-5.030e-02  3.480e-02  1.288e-01  3.100e-03 -4.600e-03]\n",
      " [-3.110e-02 -7.800e-03  2.520e-02  1.000e-04 -3.000e-04]]\n",
      "mean_state_value -0.008229021919345974\n",
      "episode 142/600\n",
      "p1 0.7144000000000001 p0 0.07139999999999998\n",
      "trajectorySteps 67\n",
      "[[18  1  1  1  2]\n",
      " [20  2  0  0  1]\n",
      " [ 5  1  0  1  5]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  2  2  1]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.140e-02 -4.660e-02 -3.400e-02 -1.330e-02 -2.210e-02]\n",
      " [-4.940e-02 -2.260e-02 -1.090e-02 -1.080e-02 -2.500e-03]\n",
      " [-2.120e-02 -6.040e-02  2.040e-02 -8.600e-03 -1.300e-03]\n",
      " [-5.020e-02  3.490e-02  1.304e-01  3.100e-03 -4.600e-03]\n",
      " [-3.100e-02 -7.800e-03  2.610e-02  1.000e-04 -3.000e-04]]\n",
      "mean_state_value -0.00816123561935707\n",
      "episode 143/600\n",
      "p1 0.7152000000000001 p0 0.07119999999999997\n",
      "trajectorySteps 60\n",
      "[[22 19  0  0  0]\n",
      " [ 2  1  2  1  1]\n",
      " [ 2  0  0  0  1]\n",
      " [ 0  0  2  1  2]\n",
      " [ 0  0  0  1  3]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0217 -0.0474 -0.0339 -0.0133 -0.0221]\n",
      " [-0.0493 -0.0233 -0.0116 -0.0108 -0.0024]\n",
      " [-0.0212 -0.0602  0.0205 -0.0085 -0.0013]\n",
      " [-0.05    0.035   0.1305  0.0039 -0.0046]\n",
      " [-0.031  -0.0077  0.0261 -0.0006 -0.0004]]\n",
      "mean_state_value -0.008211999744337638\n",
      "episode 144/600\n",
      "p1 0.7160000000000001 p0 0.07099999999999998\n",
      "trajectorySteps 129\n",
      "[[14  1  0  0  0]\n",
      " [33  7  0  0  0]\n",
      " [39  2  0  0  0]\n",
      " [16  0  2  0  0]\n",
      " [13  1  1  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0219 -0.0473 -0.0338 -0.0133 -0.022 ]\n",
      " [-0.0536 -0.0239 -0.0116 -0.0108 -0.0024]\n",
      " [-0.0217 -0.0601  0.0205 -0.0085 -0.0013]\n",
      " [-0.0501  0.0351  0.1308  0.0039 -0.0046]\n",
      " [-0.032  -0.0077  0.0269 -0.0006 -0.0004]]\n",
      "mean_state_value -0.008413507192670859\n",
      "episode 145/600\n",
      "p1 0.7168000000000001 p0 0.07079999999999997\n",
      "trajectorySteps 22\n",
      "[[0 0 0 0 0]\n",
      " [0 1 0 0 0]\n",
      " [9 9 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0218 -0.0471 -0.0338 -0.0132 -0.0219]\n",
      " [-0.0534 -0.0239 -0.0116 -0.0107 -0.0024]\n",
      " [-0.0217 -0.0614  0.0205 -0.0085 -0.0013]\n",
      " [-0.05    0.0359  0.1323  0.0039 -0.0046]\n",
      " [-0.032  -0.0077  0.027  -0.0006 -0.0004]]\n",
      "mean_state_value -0.008330789714867606\n",
      "episode 146/600\n",
      "p1 0.7176 p0 0.07059999999999997\n",
      "trajectorySteps 21\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [9 2 1 0 0]\n",
      " [7 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0218 -0.047  -0.0337 -0.0132 -0.0219]\n",
      " [-0.0533 -0.0239 -0.0115 -0.0107 -0.0024]\n",
      " [-0.0218 -0.0619  0.0214 -0.0085 -0.0013]\n",
      " [-0.0499  0.036   0.1339  0.0039 -0.0046]\n",
      " [-0.0319 -0.0076  0.027  -0.0006 -0.0004]]\n",
      "mean_state_value -0.008220075186931918\n",
      "episode 147/600\n",
      "p1 0.7184000000000001 p0 0.07039999999999998\n",
      "trajectorySteps 160\n",
      "[[121   9   0   0   0]\n",
      " [ 15   2   0   0   0]\n",
      " [  5   2   0   0   0]\n",
      " [  2   1   2   0   0]\n",
      " [  1   0   0   0   0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0233 -0.047  -0.0336 -0.0131 -0.0218]\n",
      " [-0.0546 -0.0239 -0.0115 -0.0107 -0.0024]\n",
      " [-0.0218 -0.0624  0.0214 -0.0084 -0.0013]\n",
      " [-0.0498  0.0369  0.1341  0.0039 -0.0046]\n",
      " [-0.0318 -0.0076  0.0271 -0.0006 -0.0004]]\n",
      "mean_state_value -0.008292118423001009\n",
      "episode 148/600\n",
      "p1 0.7192000000000001 p0 0.07019999999999997\n",
      "trajectorySteps 24\n",
      "[[0 0 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [9 2 0 0 0]\n",
      " [5 0 2 0 0]\n",
      " [1 1 1 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0233 -0.0469 -0.0335 -0.0131 -0.0217]\n",
      " [-0.0552 -0.0238 -0.0115 -0.0106 -0.0024]\n",
      " [-0.0218 -0.0623  0.0215 -0.0084 -0.0013]\n",
      " [-0.0497  0.037   0.1357  0.0039 -0.0046]\n",
      " [-0.0324 -0.0076  0.0279 -0.0006 -0.0004]]\n",
      "mean_state_value -0.008203222643000635\n",
      "episode 149/600\n",
      "p1 0.7200000000000001 p0 0.06999999999999998\n",
      "trajectorySteps 37\n",
      "[[ 1  2  1  1  0]\n",
      " [ 0  1  0  1  1]\n",
      " [ 5  5  0  0  3]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  2 10]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0232 -0.0468 -0.0334 -0.0131 -0.0217]\n",
      " [-0.055  -0.0238 -0.0114 -0.0106 -0.0024]\n",
      " [-0.0218 -0.0628  0.0215 -0.0084 -0.0013]\n",
      " [-0.0496  0.0371  0.1359  0.0039 -0.0045]\n",
      " [-0.0323 -0.0076  0.0287 -0.0006 -0.0004]]\n",
      "mean_state_value -0.008142208486404184\n",
      "episode 150/600\n",
      "p1 0.7208000000000001 p0 0.06979999999999997\n",
      "trajectorySteps 76\n",
      "[[12  9  1  1  1]\n",
      " [ 3  1  0  1  1]\n",
      " [23  3  0  1  7]\n",
      " [ 3  0  2  0  1]\n",
      " [ 0  1  2  2  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0237 -0.0467 -0.0333 -0.013  -0.0216]\n",
      " [-0.0555 -0.0237 -0.0114 -0.0106 -0.0024]\n",
      " [-0.0219 -0.0627  0.0215 -0.0084 -0.0014]\n",
      " [-0.0495  0.0371  0.1375  0.0039 -0.0045]\n",
      " [-0.0323 -0.0075  0.0289 -0.0006 -0.0004]]\n",
      "mean_state_value -0.008084728574091232\n",
      "episode 151/600\n",
      "p1 0.7216 p0 0.06959999999999997\n",
      "trajectorySteps 43\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [28  3  0  0  0]\n",
      " [ 1  2  2  0  0]\n",
      " [ 0  4  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0236 -0.0466 -0.0332 -0.013  -0.0216]\n",
      " [-0.0554 -0.0237 -0.0114 -0.0105 -0.0024]\n",
      " [-0.0221 -0.0632  0.0216 -0.0083 -0.0014]\n",
      " [-0.0493  0.0373  0.139   0.0039 -0.0045]\n",
      " [-0.0322 -0.0083  0.0282 -0.0006 -0.0004]]\n",
      "mean_state_value -0.008062177788749195\n",
      "episode 152/600\n",
      "p1 0.7224000000000002 p0 0.06939999999999998\n",
      "trajectorySteps 25\n",
      "[[ 0  0  0  0  0]\n",
      " [12  1  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [ 4  1  2  0  0]\n",
      " [ 2  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0236 -0.0465 -0.0331 -0.013  -0.0215]\n",
      " [-0.0559 -0.0236 -0.0113 -0.0105 -0.0024]\n",
      " [-0.0221 -0.063   0.0216 -0.0083 -0.0014]\n",
      " [-0.0499  0.0382  0.1406  0.004  -0.0045]\n",
      " [-0.0322 -0.0083  0.0283 -0.0006 -0.0003]]\n",
      "mean_state_value -0.00797665027992033\n",
      "episode 153/600\n",
      "p1 0.7232000000000001 p0 0.06919999999999997\n",
      "trajectorySteps 32\n",
      "[[ 1  1  1  0  0]\n",
      " [ 1  0  1  1  4]\n",
      " [10  0  0  0  6]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0235 -0.0464 -0.0337 -0.0129 -0.0214]\n",
      " [-0.0558 -0.0236 -0.0113 -0.0105 -0.0025]\n",
      " [-0.0222 -0.0629  0.0217 -0.0083 -0.0014]\n",
      " [-0.0498  0.0383  0.1409  0.004  -0.0045]\n",
      " [-0.0321 -0.0083  0.0291 -0.0005 -0.0003]]\n",
      "mean_state_value -0.00791903912282347\n",
      "episode 154/600\n",
      "p1 0.7240000000000001 p0 0.06899999999999998\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0235 -0.0463 -0.0336 -0.0129 -0.0214]\n",
      " [-0.0556 -0.0235 -0.0113 -0.0105 -0.0024]\n",
      " [-0.0221 -0.0634  0.0225 -0.0083 -0.0014]\n",
      " [-0.0496  0.0383  0.1424  0.004  -0.0045]\n",
      " [-0.032  -0.0082  0.0292 -0.0005 -0.0003]]\n",
      "mean_state_value -0.00779708294176323\n",
      "episode 155/600\n",
      "p1 0.7248000000000001 p0 0.06879999999999997\n",
      "trajectorySteps 8\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [1 0 2 0 0]\n",
      " [1 1 1 0 0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0234 -0.0461 -0.0335 -0.0129 -0.0213]\n",
      " [-0.0555 -0.0234 -0.0112 -0.0104 -0.0024]\n",
      " [-0.0222 -0.0632  0.0226 -0.0082 -0.0014]\n",
      " [-0.0495  0.0384  0.144   0.004  -0.0045]\n",
      " [-0.0326 -0.0082  0.03   -0.0005 -0.0003]]\n",
      "mean_state_value -0.007677776618134771\n",
      "episode 156/600\n",
      "p1 0.7256 p0 0.06859999999999997\n",
      "trajectorySteps 104\n",
      "[[ 0  0  0  0  0]\n",
      " [ 4  2  2  1  2]\n",
      " [34  9  2  2  9]\n",
      " [31  1  2  0  0]\n",
      " [ 3  0  0  0  0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0234 -0.046  -0.0334 -0.0128 -0.0213]\n",
      " [-0.0566 -0.0241 -0.0119 -0.0104 -0.0024]\n",
      " [-0.0224 -0.0638  0.0227 -0.0089 -0.0014]\n",
      " [-0.0496  0.0385  0.1456  0.004  -0.0045]\n",
      " [-0.0326 -0.0082  0.0301 -0.0005 -0.0003]]\n",
      "mean_state_value -0.007744883731548806\n",
      "episode 157/600\n",
      "p1 0.7264000000000002 p0 0.06839999999999997\n",
      "trajectorySteps 45\n",
      "[[ 1  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [14  2  0  0  0]\n",
      " [10  0  2  1  0]\n",
      " [ 9  1  1  2  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0234 -0.0459 -0.0333 -0.0128 -0.0212]\n",
      " [-0.0565 -0.024  -0.0119 -0.0104 -0.0024]\n",
      " [-0.0225 -0.0636  0.0228 -0.0089 -0.0014]\n",
      " [-0.0496  0.0386  0.1472  0.0048 -0.0044]\n",
      " [-0.0333 -0.0081  0.0301 -0.0012 -0.0003]]\n",
      "mean_state_value -0.007665396669906987\n",
      "episode 158/600\n",
      "p1 0.7272000000000001 p0 0.06819999999999997\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [5 2 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0233 -0.0457 -0.0332 -0.0127 -0.0211]\n",
      " [-0.0563 -0.0239 -0.0118 -0.0103 -0.0024]\n",
      " [-0.0225 -0.0641  0.0228 -0.0088 -0.0014]\n",
      " [-0.0495  0.0395  0.1475  0.0048 -0.0044]\n",
      " [-0.0332 -0.0081  0.0302 -0.0012 -0.0003]]\n",
      "mean_state_value -0.007597444946525776\n",
      "episode 159/600\n",
      "p1 0.7280000000000001 p0 0.06799999999999998\n",
      "trajectorySteps 31\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 5  1  0  0  0]\n",
      " [ 4  0  2  0  0]\n",
      " [17  1  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0233 -0.0456 -0.0332 -0.0127 -0.0211]\n",
      " [-0.0562 -0.0239 -0.0118 -0.0103 -0.0024]\n",
      " [-0.0225 -0.064   0.0229 -0.0088 -0.0014]\n",
      " [-0.0494  0.0396  0.1491  0.0048 -0.0044]\n",
      " [-0.0342 -0.008   0.031  -0.0012 -0.0003]]\n",
      "mean_state_value -0.007492517055034719\n",
      "episode 160/600\n",
      "p1 0.7288000000000001 p0 0.06779999999999997\n",
      "trajectorySteps 26\n",
      "[[4 3 1 1 0]\n",
      " [2 1 0 1 2]\n",
      " [1 0 0 1 6]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0233 -0.0462 -0.0331 -0.0127 -0.021 ]\n",
      " [-0.056  -0.0238 -0.0118 -0.0103 -0.0024]\n",
      " [-0.0225 -0.0638  0.0229 -0.0095 -0.0014]\n",
      " [-0.0493  0.0397  0.1507  0.0057 -0.0044]\n",
      " [-0.0341 -0.008   0.0311 -0.0012 -0.0003]]\n",
      "mean_state_value -0.007401528495998192\n",
      "episode 161/600\n",
      "p1 0.7296 p0 0.06759999999999997\n",
      "trajectorySteps 20\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [10  1  0  0  0]\n",
      " [ 2  1  2  0  0]\n",
      " [ 2  0  0  0  0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0233 -0.046  -0.033  -0.0126 -0.0209]\n",
      " [-0.0559 -0.0238 -0.0117 -0.0102 -0.0024]\n",
      " [-0.0225 -0.0636  0.023  -0.0094 -0.0014]\n",
      " [-0.0498  0.0405  0.1523  0.0057 -0.0044]\n",
      " [-0.0341 -0.008   0.0311 -0.0012 -0.0003]]\n",
      "mean_state_value -0.007281528079242281\n",
      "episode 162/600\n",
      "p1 0.7304000000000002 p0 0.06739999999999997\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [5 0 2 0 0]\n",
      " [2 1 1 0 0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0232 -0.0459 -0.0329 -0.0126 -0.0209]\n",
      " [-0.0557 -0.0237 -0.0117 -0.0102 -0.0024]\n",
      " [-0.0225 -0.0634  0.023  -0.0094 -0.0014]\n",
      " [-0.0498  0.0406  0.1539  0.0057 -0.0044]\n",
      " [-0.0346 -0.0079  0.032  -0.0012 -0.0003]]\n",
      "mean_state_value -0.007161484336086477\n",
      "episode 163/600\n",
      "p1 0.7312000000000001 p0 0.06719999999999997\n",
      "trajectorySteps 26\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [19  2  0  0  0]\n",
      " [ 2  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0232 -0.0458 -0.0328 -0.0126 -0.0208]\n",
      " [-0.0556 -0.0236 -0.0117 -0.0102 -0.0024]\n",
      " [-0.0227 -0.0639  0.0231 -0.0094 -0.0014]\n",
      " [-0.0497  0.0415  0.1555  0.0057 -0.0044]\n",
      " [-0.0345 -0.0079  0.032  -0.0012 -0.0003]]\n",
      "mean_state_value -0.007049298316524319\n",
      "episode 164/600\n",
      "p1 0.7320000000000001 p0 0.06699999999999998\n",
      "trajectorySteps 44\n",
      "[[9 1 1 1 0]\n",
      " [8 0 0 1 2]\n",
      " [1 0 0 0 8]\n",
      " [0 0 2 0 7]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0232 -0.0457 -0.0327 -0.0125 -0.0208]\n",
      " [-0.0555 -0.0236 -0.0116 -0.0102 -0.0024]\n",
      " [-0.0227 -0.0637  0.0231 -0.0094 -0.0014]\n",
      " [-0.0496  0.0416  0.1571  0.0057 -0.0044]\n",
      " [-0.0344 -0.0079  0.0329 -0.0011 -0.0003]]\n",
      "mean_state_value -0.00690579422241394\n",
      "episode 165/600\n",
      "p1 0.7328000000000001 p0 0.06679999999999997\n",
      "trajectorySteps 53\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [24  0  2  0  0]\n",
      " [24  1  1  0  0]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️🔄\n",
      "[[-0.0232 -0.0455 -0.0326 -0.0125 -0.0207]\n",
      " [-0.0553 -0.0235 -0.0116 -0.0101 -0.0024]\n",
      " [-0.0227 -0.0636  0.0231 -0.0093 -0.0014]\n",
      " [-0.0497  0.0417  0.1587  0.0057 -0.0043]\n",
      " [-0.0352 -0.0078  0.0338 -0.0011 -0.0003]]\n",
      "mean_state_value -0.006798027035776335\n",
      "episode 166/600\n",
      "p1 0.7336 p0 0.06659999999999996\n",
      "trajectorySteps 41\n",
      "[[ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [27  5  0  0  0]\n",
      " [ 2  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️🔄\n",
      "[[-0.0231 -0.0454 -0.0325 -0.0125 -0.0206]\n",
      " [-0.0552 -0.0234 -0.0116 -0.0101 -0.0024]\n",
      " [-0.0229 -0.0647  0.0232 -0.0093 -0.0014]\n",
      " [-0.0496  0.0426  0.1603  0.0057 -0.0043]\n",
      " [-0.0351 -0.0078  0.0338 -0.0011 -0.0003]]\n",
      "mean_state_value -0.006711697840611747\n",
      "episode 167/600\n",
      "p1 0.7344000000000002 p0 0.06639999999999997\n",
      "trajectorySteps 41\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [30  6  1  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️🔄\n",
      "[[-0.0231 -0.0453 -0.0324 -0.0124 -0.0206]\n",
      " [-0.055  -0.0234 -0.0115 -0.0101 -0.0024]\n",
      " [-0.0232 -0.0659  0.0232 -0.0093 -0.0014]\n",
      " [-0.0494  0.0435  0.1619  0.0057 -0.0043]\n",
      " [-0.0351 -0.0078  0.0339 -0.0011 -0.0003]]\n",
      "mean_state_value -0.0066280508057284014\n",
      "episode 168/600\n",
      "p1 0.7352000000000001 p0 0.06619999999999997\n",
      "trajectorySteps 67\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 8  0  0  0  0]\n",
      " [25  2  2  0  0]\n",
      " [29  0  0  0  0]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬇️⏬⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️🔄\n",
      "[[-0.023  -0.0451 -0.0323 -0.0124 -0.0205]\n",
      " [-0.0549 -0.0233 -0.0115 -0.01   -0.0024]\n",
      " [-0.0232 -0.0657  0.0233 -0.0093 -0.0014]\n",
      " [-0.0507  0.0444  0.1635  0.0057 -0.0043]\n",
      " [-0.0357 -0.0078  0.0339 -0.0011 -0.0003]]\n",
      "mean_state_value -0.00656237884186617\n",
      "episode 169/600\n",
      "p1 0.7360000000000001 p0 0.06599999999999998\n",
      "trajectorySteps 58\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  2  0  0  0]\n",
      " [19  4  0  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [26  1  2  0  0]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.023  -0.045  -0.0322 -0.0124 -0.0204]\n",
      " [-0.0547 -0.0233 -0.0115 -0.01   -0.0023]\n",
      " [-0.0234 -0.0668  0.0233 -0.0092 -0.0014]\n",
      " [-0.0505  0.0445  0.1639  0.0058 -0.0043]\n",
      " [-0.0367 -0.0077  0.0348 -0.0011 -0.0003]]\n",
      "mean_state_value -0.006565117789421253\n",
      "episode 170/600\n",
      "p1 0.7368000000000001 p0 0.06579999999999997\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬇️⏪⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.023  -0.0449 -0.0321 -0.0123 -0.0204]\n",
      " [-0.0546 -0.0232 -0.0114 -0.01   -0.0023]\n",
      " [-0.0233 -0.0672  0.0234 -0.0092 -0.0014]\n",
      " [-0.0504  0.0454  0.1642  0.0058 -0.0043]\n",
      " [-0.0366 -0.0077  0.0349 -0.0011 -0.0003]]\n",
      "mean_state_value -0.006489494297426872\n",
      "episode 171/600\n",
      "p1 0.7376 p0 0.06559999999999996\n",
      "trajectorySteps 63\n",
      "[[ 0  0  0  0  0]\n",
      " [22  2  0  0  0]\n",
      " [24  4  0  0  0]\n",
      " [ 5  3  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0229 -0.0447 -0.032  -0.0123 -0.0203]\n",
      " [-0.0558 -0.0232 -0.0114 -0.0099 -0.0023]\n",
      " [-0.0236 -0.0683  0.0234 -0.0092 -0.0014]\n",
      " [-0.0509  0.0462  0.1658  0.0058 -0.0043]\n",
      " [-0.0365 -0.0077  0.0349 -0.0011 -0.0003]]\n",
      "mean_state_value -0.006482432365018514\n",
      "episode 172/600\n",
      "p1 0.7384000000000002 p0 0.06539999999999997\n",
      "trajectorySteps 14\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [10  0  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0229 -0.0446 -0.032  -0.0123 -0.0203]\n",
      " [-0.0556 -0.0231 -0.0114 -0.0099 -0.0023]\n",
      " [-0.0236 -0.0681  0.0235 -0.0091 -0.0014]\n",
      " [-0.0514  0.0471  0.1674  0.0058 -0.0042]\n",
      " [-0.0364 -0.0076  0.035  -0.0011 -0.0003]]\n",
      "mean_state_value -0.006355771027308161\n",
      "episode 173/600\n",
      "p1 0.7392000000000001 p0 0.06519999999999997\n",
      "trajectorySteps 60\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [51  0  0  0  0]\n",
      " [ 4  1  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0228 -0.0445 -0.0319 -0.0122 -0.0202]\n",
      " [-0.0555 -0.0231 -0.0113 -0.0099 -0.0023]\n",
      " [-0.0241 -0.0679  0.0235 -0.0091 -0.0014]\n",
      " [-0.0519  0.048   0.1678  0.0058 -0.0042]\n",
      " [-0.0364 -0.0076  0.0351 -0.0011 -0.0003]]\n",
      "mean_state_value -0.00630196901672637\n",
      "episode 174/600\n",
      "p1 0.7400000000000001 p0 0.06499999999999997\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 0]\n",
      " [1 0 2 1 0]\n",
      " [0 0 1 1 0]]\n",
      "➡️⬅️⬅️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0228 -0.0444 -0.0318 -0.0122 -0.0201]\n",
      " [-0.0553 -0.023  -0.0113 -0.0099 -0.0023]\n",
      " [-0.0241 -0.0677  0.0236 -0.0097 -0.0014]\n",
      " [-0.0517  0.0481  0.1694  0.0058 -0.0042]\n",
      " [-0.0363 -0.0076  0.0359 -0.0011 -0.0003]]\n",
      "mean_state_value -0.006174123415371799\n",
      "episode 175/600\n",
      "p1 0.7408000000000001 p0 0.06479999999999997\n",
      "trajectorySteps 160\n",
      "[[58 49  4  0  0]\n",
      " [12  3  1  1  1]\n",
      " [ 8  8  1  0  1]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  7]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0235 -0.0465 -0.0323 -0.0121 -0.0201]\n",
      " [-0.0554 -0.023  -0.0113 -0.0098 -0.0023]\n",
      " [-0.0242 -0.0682  0.0236 -0.0097 -0.0014]\n",
      " [-0.0516  0.0482  0.1697  0.0058 -0.0042]\n",
      " [-0.0362 -0.0076  0.0368 -0.001  -0.0004]]\n",
      "mean_state_value -0.006261258270164804\n",
      "episode 176/600\n",
      "p1 0.7416 p0 0.06459999999999996\n",
      "trajectorySteps 21\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [11  0  0  0  0]\n",
      " [ 7  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0234 -0.0463 -0.0322 -0.0121 -0.02  ]\n",
      " [-0.0552 -0.0229 -0.0112 -0.0098 -0.0023]\n",
      " [-0.0244 -0.068   0.0237 -0.0097 -0.0014]\n",
      " [-0.0521  0.0491  0.1701  0.0058 -0.0042]\n",
      " [-0.0361 -0.0075  0.0369 -0.001  -0.0004]]\n",
      "mean_state_value -0.006194259355140893\n",
      "episode 177/600\n",
      "p1 0.7424000000000002 p0 0.06439999999999997\n",
      "trajectorySteps 45\n",
      "[[ 5  1  1  1  0]\n",
      " [ 1  0  0  1  4]\n",
      " [ 5  1  0  1 17]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️🔄\n",
      "[[-0.0235 -0.0462 -0.0321 -0.0121 -0.02  ]\n",
      " [-0.0551 -0.0228 -0.0112 -0.0098 -0.0023]\n",
      " [-0.0243 -0.0678  0.0237 -0.0096 -0.0014]\n",
      " [-0.052   0.0492  0.1711  0.0058 -0.0042]\n",
      " [-0.036  -0.0075  0.0378 -0.001  -0.0004]]\n",
      "mean_state_value -0.006071639664979035\n",
      "episode 178/600\n",
      "p1 0.7432000000000001 p0 0.06419999999999997\n",
      "trajectorySteps 150\n",
      "[[31  2  1  1  0]\n",
      " [ 6  1  0  3  2]\n",
      " [ 9  2  0  3 17]\n",
      " [ 0  0  2  0 12]\n",
      " [ 0  0  1  1 56]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0467 -0.0321 -0.0121 -0.0199]\n",
      " [-0.055  -0.0228 -0.0112 -0.0097 -0.0023]\n",
      " [-0.0244 -0.0677  0.0238 -0.0096 -0.0015]\n",
      " [-0.0518  0.0493  0.1727  0.0058 -0.0043]\n",
      " [-0.0359 -0.0075  0.0387 -0.001  -0.0008]]\n",
      "mean_state_value -0.006002008873077621\n",
      "episode 179/600\n",
      "p1 0.7440000000000001 p0 0.06399999999999997\n",
      "trajectorySteps 40\n",
      "[[ 0  0  0  0  0]\n",
      " [ 5  0  0  0  0]\n",
      " [29  1  0  0  0]\n",
      " [ 2  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬇️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0466 -0.032  -0.012  -0.0198]\n",
      " [-0.0548 -0.0228 -0.0111 -0.0097 -0.0023]\n",
      " [-0.0247 -0.0681  0.0238 -0.0096 -0.0015]\n",
      " [-0.0518  0.0503  0.1744  0.0058 -0.0043]\n",
      " [-0.0358 -0.0075  0.0387 -0.001  -0.0008]]\n",
      "mean_state_value -0.005887498343507638\n",
      "episode 180/600\n",
      "p1 0.7448000000000001 p0 0.06379999999999997\n",
      "trajectorySteps 70\n",
      "[[10  1  1  1  0]\n",
      " [ 2  2  0  1  1]\n",
      " [17  7  0  0  2]\n",
      " [15  2  2  0  1]\n",
      " [ 1  0  1  1  2]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️🔄\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0464 -0.0319 -0.012  -0.0198]\n",
      " [-0.0547 -0.0228 -0.0111 -0.0097 -0.0023]\n",
      " [-0.0248 -0.0698  0.0238 -0.0096 -0.0015]\n",
      " [-0.0519  0.0497  0.1747  0.0058 -0.0043]\n",
      " [-0.0358 -0.0074  0.0396 -0.0009 -0.0008]]\n",
      "mean_state_value -0.005905082949196814\n",
      "episode 181/600\n",
      "p1 0.7456 p0 0.06359999999999996\n",
      "trajectorySteps 113\n",
      "[[ 9  1  2  1  0]\n",
      " [ 8  0  1  9 40]\n",
      " [15  2  0  7 11]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  2  1]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0243 -0.0463 -0.0318 -0.012  -0.0197]\n",
      " [-0.0546 -0.0227 -0.0111 -0.0103 -0.0025]\n",
      " [-0.0249 -0.0696  0.0239 -0.0095 -0.0015]\n",
      " [-0.0517  0.0498  0.1764  0.0059 -0.0043]\n",
      " [-0.0357 -0.0074  0.0405 -0.001  -0.0008]]\n",
      "mean_state_value -0.005804074432881474\n",
      "episode 182/600\n",
      "p1 0.7464000000000002 p0 0.06339999999999997\n",
      "trajectorySteps 32\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [25  1  1  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.0462 -0.0317 -0.0119 -0.0196]\n",
      " [-0.0545 -0.0226 -0.011  -0.0103 -0.0025]\n",
      " [-0.0251 -0.07    0.0248 -0.0095 -0.0015]\n",
      " [-0.0516  0.0499  0.1767  0.0059 -0.0042]\n",
      " [-0.0356 -0.0074  0.0406 -0.001  -0.0008]]\n",
      "mean_state_value -0.005729696352205482\n",
      "episode 183/600\n",
      "p1 0.7472000000000001 p0 0.06319999999999996\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [4 3 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.0461 -0.0316 -0.0119 -0.0196]\n",
      " [-0.0544 -0.0226 -0.011  -0.0102 -0.0025]\n",
      " [-0.0251 -0.0704  0.0248 -0.0095 -0.0015]\n",
      " [-0.0515  0.0508  0.1784  0.0059 -0.0042]\n",
      " [-0.0355 -0.0073  0.0406 -0.001  -0.0008]]\n",
      "mean_state_value -0.005603379843907814\n",
      "episode 184/600\n",
      "p1 0.7480000000000001 p0 0.06299999999999997\n",
      "trajectorySteps 238\n",
      "[[  5   3   3   2   2]\n",
      " [ 31   5   3   3  17]\n",
      " [  5   2   0   7 139]\n",
      " [  0   0   2   1   8]\n",
      " [  0   0   0   0   0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.046  -0.0321 -0.0119 -0.0195]\n",
      " [-0.0563 -0.0238 -0.0122 -0.0102 -0.0025]\n",
      " [-0.0252 -0.0702  0.0249 -0.0101 -0.0021]\n",
      " [-0.0513  0.0509  0.1801  0.0068 -0.0042]\n",
      " [-0.0354 -0.0073  0.0407 -0.001  -0.0008]]\n",
      "mean_state_value -0.005715441800705181\n",
      "episode 185/600\n",
      "p1 0.7488000000000001 p0 0.06279999999999997\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.0459 -0.032  -0.0118 -0.0195]\n",
      " [-0.0561 -0.0237 -0.0122 -0.0102 -0.0025]\n",
      " [-0.0251 -0.07    0.0249 -0.01   -0.002 ]\n",
      " [-0.0517  0.0519  0.1817  0.0068 -0.0042]\n",
      " [-0.0353 -0.0073  0.0408 -0.0009 -0.0008]]\n",
      "mean_state_value -0.005579560096154028\n",
      "episode 186/600\n",
      "p1 0.7496 p0 0.06259999999999996\n",
      "trajectorySteps 76\n",
      "[[ 3  1  2  1  2]\n",
      " [ 2  0  1  1 15]\n",
      " [11  3  0  6 20]\n",
      " [ 1  0  2  1  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.0458 -0.0325 -0.0118 -0.0194]\n",
      " [-0.0561 -0.0236 -0.0121 -0.0101 -0.0025]\n",
      " [-0.0252 -0.0698  0.025  -0.0106 -0.0022]\n",
      " [-0.0516  0.052   0.1834  0.0068 -0.0042]\n",
      " [-0.0352 -0.0073  0.0417 -0.0009 -0.0008]]\n",
      "mean_state_value -0.005488142643416834\n",
      "episode 187/600\n",
      "p1 0.7504000000000002 p0 0.06239999999999997\n",
      "trajectorySteps 63\n",
      "[[ 1  1  1  1  2]\n",
      " [ 3  0  0  1  3]\n",
      " [39  3  0  0  1]\n",
      " [ 1  0  2  1  1]\n",
      " [ 0  0  0  1  1]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0456 -0.0325 -0.0118 -0.0193]\n",
      " [-0.0559 -0.0236 -0.0121 -0.0101 -0.0025]\n",
      " [-0.0253 -0.0697  0.025  -0.0106 -0.0022]\n",
      " [-0.0515  0.0521  0.1851  0.0077 -0.0042]\n",
      " [-0.0351 -0.0072  0.0417 -0.0015 -0.0008]]\n",
      "mean_state_value -0.0053643624905353226\n",
      "episode 188/600\n",
      "p1 0.7512000000000001 p0 0.062199999999999964\n",
      "trajectorySteps 37\n",
      "[[5 1 1 1 0]\n",
      " [1 0 0 1 2]\n",
      " [7 0 0 0 2]\n",
      " [6 1 2 0 2]\n",
      " [0 1 1 2 1]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0455 -0.0324 -0.0117 -0.0193]\n",
      " [-0.0558 -0.0235 -0.0121 -0.0101 -0.0025]\n",
      " [-0.0254 -0.0695  0.0251 -0.0106 -0.0022]\n",
      " [-0.0514  0.053   0.1855  0.0077 -0.0042]\n",
      " [-0.035  -0.0078  0.0412 -0.0015 -0.0008]]\n",
      "mean_state_value -0.005316833715444933\n",
      "episode 189/600\n",
      "p1 0.7520000000000001 p0 0.061999999999999965\n",
      "trajectorySteps 137\n",
      "[[ 3  1  1  1  0]\n",
      " [46  4  0  1  1]\n",
      " [56 12  0  0  1]\n",
      " [ 6  0  2  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0454 -0.0323 -0.0117 -0.0192]\n",
      " [-0.0576 -0.0235 -0.012  -0.01   -0.0025]\n",
      " [-0.0257 -0.07    0.0251 -0.0105 -0.0022]\n",
      " [-0.0513  0.0531  0.1872  0.0086 -0.0048]\n",
      " [-0.0349 -0.0078  0.0412 -0.0015 -0.0008]]\n",
      "mean_state_value -0.005310252892509252\n",
      "episode 190/600\n",
      "p1 0.7528000000000001 p0 0.061799999999999966\n",
      "trajectorySteps 16\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [6 1 2 0 0]\n",
      " [6 0 0 0 0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0453 -0.0322 -0.0117 -0.0192]\n",
      " [-0.0574 -0.0235 -0.012  -0.01   -0.0024]\n",
      " [-0.0257 -0.0698  0.0252 -0.0105 -0.0022]\n",
      " [-0.0518  0.054   0.1888  0.0086 -0.0048]\n",
      " [-0.035  -0.0078  0.0413 -0.0015 -0.0008]]\n",
      "mean_state_value -0.005182244901461479\n",
      "episode 191/600\n",
      "p1 0.7536000000000002 p0 0.06159999999999997\n",
      "trajectorySteps 48\n",
      "[[ 5  1  1  3  0]\n",
      " [ 1  0  1  3  1]\n",
      " [21  0  0  0  1]\n",
      " [ 0  0  2  0  2]\n",
      " [ 0  0  1  1  4]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.024  -0.0452 -0.0321 -0.0117 -0.0191]\n",
      " [-0.0572 -0.0234 -0.012  -0.0106 -0.0024]\n",
      " [-0.0258 -0.0696  0.0252 -0.0105 -0.0022]\n",
      " [-0.0516  0.0542  0.1905  0.0086 -0.0048]\n",
      " [-0.0349 -0.0078  0.0422 -0.0014 -0.0009]]\n",
      "mean_state_value -0.005060882726596486\n",
      "episode 192/600\n",
      "p1 0.7544000000000002 p0 0.06139999999999997\n",
      "trajectorySteps 50\n",
      "[[13  1  1  1  0]\n",
      " [14  0  0  1  1]\n",
      " [ 8  1  0  0  2]\n",
      " [ 3  0  2  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄🔄⏬➡️⬆️\n",
      "⬇️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.045  -0.032  -0.0117 -0.019 ]\n",
      " [-0.0573 -0.0233 -0.0119 -0.0106 -0.0024]\n",
      " [-0.0259 -0.0694  0.0253 -0.0104 -0.0022]\n",
      " [-0.0515  0.0543  0.1922  0.0095 -0.0054]\n",
      " [-0.0348 -0.0077  0.0423 -0.0014 -0.0009]]\n",
      "mean_state_value -0.0049470614329642565\n",
      "episode 193/600\n",
      "p1 0.7552000000000001 p0 0.06119999999999996\n",
      "trajectorySteps 101\n",
      "[[10  1  1  1  3]\n",
      " [ 2  0  0  2 28]\n",
      " [ 6  0  1  3 27]\n",
      " [ 0  0  2  0  6]\n",
      " [ 0  0  2  3  3]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0244 -0.0449 -0.0319 -0.0116 -0.019 ]\n",
      " [-0.0572 -0.0233 -0.0119 -0.0105 -0.0026]\n",
      " [-0.0259 -0.0692  0.0253 -0.011  -0.0023]\n",
      " [-0.0513  0.0544  0.1939  0.0095 -0.0054]\n",
      " [-0.0347 -0.0077  0.0432 -0.0014 -0.001 ]]\n",
      "mean_state_value -0.004833192363209928\n",
      "episode 194/600\n",
      "p1 0.7560000000000001 p0 0.060999999999999964\n",
      "trajectorySteps 79\n",
      "[[16  2  1  2  5]\n",
      " [ 3  0  0  3 36]\n",
      " [ 1  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  2  2  2]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0245 -0.0448 -0.0318 -0.0116 -0.019 ]\n",
      " [-0.0571 -0.0232 -0.0118 -0.0105 -0.0027]\n",
      " [-0.0259 -0.0689  0.0254 -0.011  -0.0023]\n",
      " [-0.0512  0.0545  0.1956  0.0095 -0.0054]\n",
      " [-0.0346 -0.0077  0.0441 -0.0013 -0.0011]]\n",
      "mean_state_value -0.004694616123231452\n",
      "episode 195/600\n",
      "p1 0.7568000000000001 p0 0.060799999999999965\n",
      "trajectorySteps 27\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  2  1  0  0]\n",
      " [11  0  2  0  0]\n",
      " [ 9  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0245 -0.0447 -0.0317 -0.0116 -0.019 ]\n",
      " [-0.0569 -0.0231 -0.0118 -0.0105 -0.0027]\n",
      " [-0.0259 -0.0693  0.0263 -0.0109 -0.0023]\n",
      " [-0.0511  0.0546  0.1973  0.0095 -0.0053]\n",
      " [-0.0346 -0.0076  0.0442 -0.0013 -0.001 ]]\n",
      "mean_state_value -0.00456296161247378\n",
      "episode 196/600\n",
      "p1 0.7576000000000002 p0 0.060599999999999966\n",
      "trajectorySteps 117\n",
      "[[41  1  2  2  0]\n",
      " [ 4  0  0  1  1]\n",
      " [49  4  0  0  2]\n",
      " [ 4  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.0446 -0.0317 -0.0115 -0.0189]\n",
      " [-0.0568 -0.023  -0.0118 -0.0104 -0.0027]\n",
      " [-0.0263 -0.0692  0.0263 -0.0109 -0.0023]\n",
      " [-0.051   0.0547  0.199   0.0095 -0.0053]\n",
      " [-0.0345 -0.0076  0.0451 -0.0012 -0.001 ]]\n",
      "mean_state_value -0.004455220547969275\n",
      "episode 197/600\n",
      "p1 0.7584000000000002 p0 0.06039999999999997\n",
      "trajectorySteps 26\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 5  0  0  0  0]\n",
      " [17  1  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.0444 -0.0316 -0.0115 -0.0189]\n",
      " [-0.0567 -0.023  -0.0117 -0.0104 -0.0027]\n",
      " [-0.0264 -0.0689  0.0264 -0.0109 -0.0023]\n",
      " [-0.0515  0.0556  0.2007  0.0096 -0.0053]\n",
      " [-0.0345 -0.0076  0.0452 -0.0012 -0.001 ]]\n",
      "mean_state_value -0.00432223149550715\n",
      "episode 198/600\n",
      "p1 0.7592000000000001 p0 0.06019999999999996\n",
      "trajectorySteps 16\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [5 0 0 0 0]\n",
      " [5 0 2 0 0]\n",
      " [2 1 1 0 0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "➡️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0443 -0.0315 -0.0115 -0.0188]\n",
      " [-0.0565 -0.0229 -0.0117 -0.0104 -0.0027]\n",
      " [-0.0264 -0.0687  0.0264 -0.0108 -0.0023]\n",
      " [-0.0515  0.0557  0.2024  0.0096 -0.0053]\n",
      " [-0.035  -0.0075  0.0461 -0.0012 -0.001 ]]\n",
      "mean_state_value -0.004185760380939789\n",
      "episode 199/600\n",
      "p1 0.7600000000000001 p0 0.05999999999999996\n",
      "trajectorySteps 47\n",
      "[[ 2  1  1  1  0]\n",
      " [ 6  4  1  2  3]\n",
      " [ 1 16  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  3]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0442 -0.0314 -0.0114 -0.0187]\n",
      " [-0.0575 -0.0229 -0.0117 -0.0109 -0.0027]\n",
      " [-0.0263 -0.0697  0.0265 -0.0108 -0.0023]\n",
      " [-0.0513  0.0558  0.2041  0.0096 -0.0053]\n",
      " [-0.0349 -0.0075  0.047  -0.0012 -0.0011]]\n",
      "mean_state_value -0.004147767610842362\n",
      "episode 200/600\n",
      "p1 0.7608000000000001 p0 0.059799999999999964\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 3 2 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.044  -0.0313 -0.0114 -0.0187]\n",
      " [-0.0573 -0.0229 -0.0116 -0.0109 -0.0027]\n",
      " [-0.0264 -0.0701  0.0268 -0.0108 -0.0022]\n",
      " [-0.0511  0.0559  0.2058  0.0096 -0.0053]\n",
      " [-0.0348 -0.0075  0.0471 -0.0012 -0.0011]]\n",
      "mean_state_value -0.0040332029193148695\n",
      "episode 201/600\n",
      "p1 0.7616000000000002 p0 0.059599999999999966\n",
      "trajectorySteps 21\n",
      "[[3 1 1 1 0]\n",
      " [2 0 0 1 1]\n",
      " [3 0 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "🔄➡️⬅️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0439 -0.0312 -0.0114 -0.0186]\n",
      " [-0.0571 -0.0228 -0.0116 -0.0108 -0.0027]\n",
      " [-0.0264 -0.0699  0.0269 -0.0107 -0.0022]\n",
      " [-0.051   0.0561  0.2063  0.0096 -0.0053]\n",
      " [-0.0347 -0.0074  0.0481 -0.0011 -0.0011]]\n",
      "mean_state_value -0.003928848805347223\n",
      "episode 202/600\n",
      "p1 0.7624000000000002 p0 0.05939999999999997\n",
      "trajectorySteps 33\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [9 2 0 0 0]\n",
      " [8 0 2 0 0]\n",
      " [8 1 2 1 0]]\n",
      "🔄➡️⬅️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0438 -0.0311 -0.0113 -0.0185]\n",
      " [-0.057  -0.0227 -0.0115 -0.0108 -0.0026]\n",
      " [-0.0265 -0.0697  0.0269 -0.0107 -0.0022]\n",
      " [-0.051   0.0562  0.208   0.0096 -0.0053]\n",
      " [-0.0353 -0.0074  0.049  -0.0011 -0.0011]]\n",
      "mean_state_value -0.0038010631046882\n",
      "episode 203/600\n",
      "p1 0.7632000000000001 p0 0.05919999999999996\n",
      "trajectorySteps 29\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [15  0  0  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [ 5  1  3  1  0]]\n",
      "🔄➡️⬅️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0249 -0.0436 -0.031  -0.0113 -0.0185]\n",
      " [-0.0568 -0.0227 -0.0115 -0.0108 -0.0026]\n",
      " [-0.0266 -0.0695  0.027  -0.0107 -0.0022]\n",
      " [-0.0509  0.0563  0.2084  0.0096 -0.0053]\n",
      " [-0.0359 -0.0073  0.0499 -0.001  -0.0011]]\n",
      "mean_state_value -0.003717444973396271\n",
      "episode 204/600\n",
      "p1 0.7640000000000001 p0 0.05899999999999996\n",
      "trajectorySteps 8\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 2 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️⬅️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0249 -0.0435 -0.0309 -0.0113 -0.0184]\n",
      " [-0.0566 -0.0226 -0.0115 -0.0107 -0.0026]\n",
      " [-0.0266 -0.0699  0.027  -0.0106 -0.0022]\n",
      " [-0.0507  0.0572  0.2102  0.0097 -0.0053]\n",
      " [-0.0358 -0.0073  0.0499 -0.001  -0.0011]]\n",
      "mean_state_value -0.0035786804972238483\n",
      "episode 205/600\n",
      "p1 0.7648000000000001 p0 0.05879999999999996\n",
      "trajectorySteps 82\n",
      "[[30  4  4  0  0]\n",
      " [ 3  0  2  2  3]\n",
      " [19  1  2  1  3]\n",
      " [ 2  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.0434 -0.0319 -0.0112 -0.0184]\n",
      " [-0.0565 -0.0225 -0.0126 -0.0107 -0.0027]\n",
      " [-0.0267 -0.0697  0.0271 -0.0106 -0.0022]\n",
      " [-0.0506  0.0573  0.2119  0.0097 -0.0052]\n",
      " [-0.0357 -0.0073  0.0509 -0.001  -0.0011]]\n",
      "mean_state_value -0.0035268202927126314\n",
      "episode 206/600\n",
      "p1 0.7656000000000002 p0 0.058599999999999965\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [1 0 2 0 0]\n",
      " [5 1 1 0 0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0433 -0.0318 -0.0112 -0.0183]\n",
      " [-0.0563 -0.0225 -0.0126 -0.0107 -0.0027]\n",
      " [-0.0266 -0.0694  0.0271 -0.0105 -0.0022]\n",
      " [-0.0504  0.0574  0.2136  0.0097 -0.0052]\n",
      " [-0.0362 -0.0072  0.0518 -0.001  -0.0011]]\n",
      "mean_state_value -0.003382800104882907\n",
      "episode 207/600\n",
      "p1 0.7664000000000002 p0 0.058399999999999966\n",
      "trajectorySteps 14\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [6 5 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0432 -0.0317 -0.0112 -0.0182]\n",
      " [-0.0561 -0.0224 -0.0125 -0.0106 -0.0027]\n",
      " [-0.0267 -0.0698  0.0272 -0.0105 -0.0022]\n",
      " [-0.0503  0.0584  0.2153  0.0097 -0.0052]\n",
      " [-0.0361 -0.0072  0.0519 -0.001  -0.0011]]\n",
      "mean_state_value -0.0032469344696881464\n",
      "episode 208/600\n",
      "p1 0.7672000000000001 p0 0.05819999999999996\n",
      "trajectorySteps 19\n",
      "[[2 1 1 1 0]\n",
      " [3 0 0 1 1]\n",
      " [4 0 0 0 1]\n",
      " [0 0 2 1 1]\n",
      " [0 0 0 0 0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.043  -0.0316 -0.0111 -0.0182]\n",
      " [-0.056  -0.0223 -0.0125 -0.0106 -0.0027]\n",
      " [-0.0267 -0.0696  0.0272 -0.0105 -0.0022]\n",
      " [-0.0501  0.0585  0.2158  0.0106 -0.0058]\n",
      " [-0.036  -0.0071  0.052  -0.001  -0.0011]]\n",
      "mean_state_value -0.0031552486813411795\n",
      "episode 209/600\n",
      "p1 0.7680000000000001 p0 0.05799999999999996\n",
      "trajectorySteps 208\n",
      "[[44  3  0  0  0]\n",
      " [46  3  0  0  0]\n",
      " [53  8  0  0  0]\n",
      " [44  0  2  0  0]\n",
      " [ 3  1  1  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0254 -0.043  -0.0315 -0.0111 -0.0181]\n",
      " [-0.0573 -0.0223 -0.0124 -0.0106 -0.0026]\n",
      " [-0.0271 -0.0701  0.0273 -0.0104 -0.0022]\n",
      " [-0.0504  0.0586  0.2175  0.0106 -0.0057]\n",
      " [-0.0365 -0.0071  0.0529 -0.001  -0.0011]]\n",
      "mean_state_value -0.0031609055516268135\n",
      "episode 210/600\n",
      "p1 0.7688000000000001 p0 0.05779999999999996\n",
      "trajectorySteps 47\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 4  0  2  0  0]\n",
      " [38  1  1  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0254 -0.0428 -0.0314 -0.0111 -0.0181]\n",
      " [-0.0571 -0.0223 -0.0124 -0.0105 -0.0026]\n",
      " [-0.0271 -0.0698  0.0273 -0.0104 -0.0022]\n",
      " [-0.0504  0.0587  0.2193  0.0107 -0.0057]\n",
      " [-0.0373 -0.007   0.0539 -0.001  -0.0011]]\n",
      "mean_state_value -0.003027786168955554\n",
      "episode 211/600\n",
      "p1 0.7696000000000002 p0 0.057599999999999964\n",
      "trajectorySteps 8\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0253 -0.0427 -0.0313 -0.011  -0.018 ]\n",
      " [-0.057  -0.0222 -0.0124 -0.0105 -0.0026]\n",
      " [-0.0272 -0.0696  0.0274 -0.0104 -0.0022]\n",
      " [-0.0507  0.0597  0.221   0.0107 -0.0057]\n",
      " [-0.0372 -0.007   0.054  -0.001  -0.0011]]\n",
      "mean_state_value -0.0028840277650388057\n",
      "episode 212/600\n",
      "p1 0.7704000000000002 p0 0.057399999999999965\n",
      "trajectorySteps 111\n",
      "[[ 0  0  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [95  8  0  0  0]\n",
      " [ 0  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0253 -0.0425 -0.0312 -0.011  -0.0179]\n",
      " [-0.0569 -0.0221 -0.0123 -0.0105 -0.0026]\n",
      " [-0.028  -0.0701  0.0274 -0.0103 -0.0022]\n",
      " [-0.0506  0.0601  0.2228  0.0107 -0.0057]\n",
      " [-0.0371 -0.0069  0.0541 -0.0009 -0.0011]]\n",
      "mean_state_value -0.0028032825902345637\n",
      "episode 213/600\n",
      "p1 0.7712000000000001 p0 0.05719999999999996\n",
      "trajectorySteps 32\n",
      "[[0 0 0 0 0]\n",
      " [0 0 1 1 6]\n",
      " [6 4 1 1 6]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0252 -0.0424 -0.0311 -0.011  -0.0179]\n",
      " [-0.0567 -0.0221 -0.0123 -0.0104 -0.0026]\n",
      " [-0.0282 -0.0704  0.0269 -0.0103 -0.0022]\n",
      " [-0.0504  0.0602  0.2245  0.0107 -0.0057]\n",
      " [-0.037  -0.0069  0.055  -0.0009 -0.001 ]]\n",
      "mean_state_value -0.002686953285991278\n",
      "episode 214/600\n",
      "p1 0.7720000000000001 p0 0.05699999999999996\n",
      "trajectorySteps 57\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [46  5  1  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.0423 -0.031  -0.0109 -0.0178]\n",
      " [-0.0565 -0.022  -0.0122 -0.0104 -0.0026]\n",
      " [-0.0283 -0.0708  0.0279 -0.0103 -0.0022]\n",
      " [-0.0503  0.0603  0.2263  0.0107 -0.0056]\n",
      " [-0.0369 -0.0069  0.0551 -0.0009 -0.001 ]]\n",
      "mean_state_value -0.0025496568940644474\n",
      "episode 215/600\n",
      "p1 0.7728000000000002 p0 0.05679999999999996\n",
      "trajectorySteps 68\n",
      "[[ 6  0  0  0  0]\n",
      " [27  0  0  0  0]\n",
      " [28  1  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 1  1  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0252 -0.0421 -0.0309 -0.0109 -0.0177]\n",
      " [-0.0566 -0.0219 -0.0122 -0.0104 -0.0026]\n",
      " [-0.0284 -0.0706  0.0279 -0.0102 -0.0022]\n",
      " [-0.0502  0.0613  0.228   0.0107 -0.0056]\n",
      " [-0.0373 -0.0074  0.0552 -0.0009 -0.001 ]]\n",
      "mean_state_value -0.0024469224018410767\n",
      "episode 216/600\n",
      "p1 0.7736000000000002 p0 0.05659999999999996\n",
      "trajectorySteps 20\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [7 1 2 0 0]\n",
      " [8 0 0 0 0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.042  -0.0308 -0.0108 -0.0177]\n",
      " [-0.0564 -0.0218 -0.0121 -0.0103 -0.0026]\n",
      " [-0.0284 -0.0704  0.028  -0.0102 -0.0022]\n",
      " [-0.0506  0.0623  0.2298  0.0107 -0.0056]\n",
      " [-0.0375 -0.0074  0.0553 -0.0009 -0.001 ]]\n",
      "mean_state_value -0.002315384297430975\n",
      "episode 217/600\n",
      "p1 0.7744000000000002 p0 0.056399999999999964\n",
      "trajectorySteps 9\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [1 0 0 0 0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.0418 -0.0307 -0.0108 -0.0176]\n",
      " [-0.0563 -0.0218 -0.0121 -0.0103 -0.0026]\n",
      " [-0.0284 -0.0701  0.0281 -0.0102 -0.0021]\n",
      " [-0.051   0.0633  0.2315  0.0107 -0.0056]\n",
      " [-0.0374 -0.0074  0.0553 -0.0009 -0.001 ]]\n",
      "mean_state_value -0.0021693024526187013\n",
      "episode 218/600\n",
      "p1 0.7752000000000001 p0 0.05619999999999996\n",
      "trajectorySteps 31\n",
      "[[8 1 1 1 3]\n",
      " [5 1 0 0 2]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.0417 -0.0306 -0.0108 -0.0176]\n",
      " [-0.0566 -0.0218 -0.0121 -0.0102 -0.0026]\n",
      " [-0.0284 -0.0699  0.0281 -0.0101 -0.0021]\n",
      " [-0.0509  0.0634  0.2333  0.0108 -0.0056]\n",
      " [-0.0373 -0.0073  0.0563 -0.0008 -0.001 ]]\n",
      "mean_state_value -0.0020319492596328373\n",
      "episode 219/600\n",
      "p1 0.7760000000000001 p0 0.05599999999999996\n",
      "trajectorySteps 18\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [6 0 2 0 0]\n",
      " [6 1 1 0 0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬆️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.0416 -0.0305 -0.0107 -0.0175]\n",
      " [-0.0565 -0.0217 -0.012  -0.0102 -0.0026]\n",
      " [-0.0284 -0.0697  0.0282 -0.0101 -0.0021]\n",
      " [-0.0508  0.0635  0.2351  0.0108 -0.0056]\n",
      " [-0.0379 -0.0073  0.0573 -0.0008 -0.001 ]]\n",
      "mean_state_value -0.001892619788804076\n",
      "episode 220/600\n",
      "p1 0.7768000000000002 p0 0.05579999999999996\n",
      "trajectorySteps 118\n",
      "[[11  0  0  0  1]\n",
      " [ 2  1  1  5 42]\n",
      " [ 3  1  0  2 37]\n",
      " [ 5  0  2  0  1]\n",
      " [ 0  0  1  1  2]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️🔄🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0253 -0.0415 -0.0304 -0.0107 -0.0175]\n",
      " [-0.0568 -0.0222 -0.012  -0.0102 -0.0028]\n",
      " [-0.0284 -0.0695  0.0282 -0.0101 -0.0022]\n",
      " [-0.0507  0.0637  0.2368  0.0108 -0.0056]\n",
      " [-0.0378 -0.0072  0.0583 -0.0008 -0.001 ]]\n",
      "mean_state_value -0.0017859348025605429\n",
      "episode 221/600\n",
      "p1 0.7776000000000002 p0 0.05559999999999996\n",
      "trajectorySteps 17\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [4 0 2 0 0]\n",
      " [4 1 2 1 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️🔄🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0252 -0.0413 -0.0303 -0.0107 -0.0174]\n",
      " [-0.0566 -0.0221 -0.0119 -0.0101 -0.0028]\n",
      " [-0.0284 -0.0692  0.0283 -0.01   -0.0022]\n",
      " [-0.0506  0.0638  0.2386  0.0108 -0.0056]\n",
      " [-0.0383 -0.0072  0.0592 -0.0007 -0.001 ]]\n",
      "mean_state_value -0.001640335410971733\n",
      "episode 222/600\n",
      "p1 0.7784000000000002 p0 0.05539999999999996\n",
      "trajectorySteps 21\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [8 0 2 0 0]\n",
      " [7 1 1 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️🔄🔄\n",
      "➡️⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0252 -0.0412 -0.0302 -0.0106 -0.0174]\n",
      " [-0.0564 -0.022  -0.0119 -0.0101 -0.0028]\n",
      " [-0.0284 -0.069   0.0283 -0.01   -0.0022]\n",
      " [-0.0504  0.0639  0.2404  0.0108 -0.0055]\n",
      " [-0.0389 -0.0071  0.0602 -0.0007 -0.001 ]]\n",
      "mean_state_value -0.0014964695499964712\n",
      "episode 223/600\n",
      "p1 0.7792000000000001 p0 0.05519999999999996\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 2 0 0 0]\n",
      " [1 0 2 0 0]\n",
      " [1 1 1 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️🔄🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.041  -0.0301 -0.0106 -0.0173]\n",
      " [-0.0563 -0.022  -0.0119 -0.0101 -0.0028]\n",
      " [-0.0285 -0.0688  0.0284 -0.0099 -0.0022]\n",
      " [-0.0503  0.064   0.2421  0.0108 -0.0055]\n",
      " [-0.0393 -0.007   0.0612 -0.0007 -0.001 ]]\n",
      "mean_state_value -0.0013474109732278063\n",
      "episode 224/600\n",
      "p1 0.7800000000000001 p0 0.05499999999999996\n",
      "trajectorySteps 52\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 4  1  0  0  0]\n",
      " [41  3  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️🔄🔄\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0409 -0.03   -0.0105 -0.0172]\n",
      " [-0.0561 -0.0219 -0.0118 -0.01   -0.0028]\n",
      " [-0.0284 -0.0686  0.0284 -0.0099 -0.0022]\n",
      " [-0.0513  0.0645  0.2439  0.0108 -0.0055]\n",
      " [-0.0393 -0.007   0.0613 -0.0007 -0.001 ]]\n",
      "mean_state_value -0.001245448244076285\n",
      "episode 225/600\n",
      "p1 0.7808000000000002 p0 0.05479999999999996\n",
      "trajectorySteps 27\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 4 0]\n",
      " [3 1 0 1 1]\n",
      " [0 0 2 1 3]\n",
      " [0 0 1 3 2]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "🔄⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0408 -0.0299 -0.0105 -0.0172]\n",
      " [-0.0559 -0.0218 -0.0118 -0.01   -0.0028]\n",
      " [-0.0284 -0.0684  0.0285 -0.0099 -0.0022]\n",
      " [-0.0511  0.0646  0.2457  0.0109 -0.0055]\n",
      " [-0.0391 -0.0069  0.0623 -0.0012 -0.001 ]]\n",
      "mean_state_value -0.001096889033252225\n",
      "episode 226/600\n",
      "p1 0.7816000000000002 p0 0.05459999999999996\n",
      "trajectorySteps 29\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [23  2  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "⬇️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0249 -0.0406 -0.0298 -0.0105 -0.0171]\n",
      " [-0.0557 -0.0217 -0.0117 -0.01   -0.0028]\n",
      " [-0.0285 -0.0687  0.0285 -0.0098 -0.0022]\n",
      " [-0.051   0.0656  0.2475  0.0109 -0.0055]\n",
      " [-0.039  -0.0069  0.0624 -0.0012 -0.001 ]]\n",
      "mean_state_value -0.0009512171076333604\n",
      "episode 227/600\n",
      "p1 0.7824000000000002 p0 0.05439999999999996\n",
      "trajectorySteps 9\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [3 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "➡️⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0249 -0.0405 -0.0297 -0.0104 -0.017 ]\n",
      " [-0.0555 -0.0217 -0.0117 -0.0099 -0.0027]\n",
      " [-0.0285 -0.0684  0.0286 -0.0098 -0.0022]\n",
      " [-0.0514  0.0666  0.2493  0.0109 -0.0055]\n",
      " [-0.0389 -0.0069  0.0624 -0.0012 -0.001 ]]\n",
      "mean_state_value -0.0008020227916600868\n",
      "episode 228/600\n",
      "p1 0.7832000000000001 p0 0.05419999999999996\n",
      "trajectorySteps 188\n",
      "[[142   6   0   0   0]\n",
      " [ 12   0   0   0   0]\n",
      " [ 14  11   1   0   0]\n",
      " [  0   0   2   0   0]\n",
      " [  0   0   0   0   0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏪⏩️➡️🔄\n",
      "⬇️🔄⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0256 -0.0404 -0.0296 -0.0104 -0.017 ]\n",
      " [-0.0555 -0.0216 -0.0117 -0.0099 -0.0027]\n",
      " [-0.0286 -0.0689  0.0296 -0.0098 -0.0021]\n",
      " [-0.0512  0.0667  0.2498  0.0109 -0.0055]\n",
      " [-0.0388 -0.0069  0.0625 -0.0011 -0.001 ]]\n",
      "mean_state_value -0.0007409408344733618\n",
      "episode 229/600\n",
      "p1 0.7840000000000001 p0 0.05399999999999996\n",
      "trajectorySteps 28\n",
      "[[ 0  0  0  0  0]\n",
      " [ 4  1  0  0  0]\n",
      " [12  0  0  0  0]\n",
      " [ 8  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️🔄\n",
      "🔄🔄⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0255 -0.0402 -0.0295 -0.0104 -0.0169]\n",
      " [-0.0559 -0.0216 -0.0116 -0.0099 -0.0027]\n",
      " [-0.0287 -0.0686  0.0297 -0.0097 -0.0021]\n",
      " [-0.0516  0.0678  0.2504  0.0109 -0.0055]\n",
      " [-0.0387 -0.0068  0.0626 -0.0011 -0.001 ]]\n",
      "mean_state_value -0.0006679887029232729\n",
      "episode 230/600\n",
      "p1 0.7848000000000002 p0 0.05379999999999996\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 8 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️🔄\n",
      "🔄⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0254 -0.0401 -0.0294 -0.0103 -0.0169]\n",
      " [-0.0557 -0.0215 -0.0116 -0.0098 -0.0027]\n",
      " [-0.0287 -0.069   0.0297 -0.0097 -0.0021]\n",
      " [-0.0515  0.0688  0.251   0.0109 -0.0054]\n",
      " [-0.0386 -0.0068  0.0627 -0.0011 -0.001 ]]\n",
      "mean_state_value -0.0005655054165479511\n",
      "episode 231/600\n",
      "p1 0.7856000000000002 p0 0.05359999999999996\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️🔄\n",
      "🔄⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0254 -0.04   -0.0293 -0.0103 -0.0168]\n",
      " [-0.0555 -0.0214 -0.0115 -0.0098 -0.0027]\n",
      " [-0.0286 -0.0693  0.0302 -0.0097 -0.0021]\n",
      " [-0.0513  0.0689  0.2528  0.0109 -0.0054]\n",
      " [-0.0384 -0.0068  0.0628 -0.0011 -0.001 ]]\n",
      "mean_state_value -0.0004298151943064462\n",
      "episode 232/600\n",
      "p1 0.7864000000000002 p0 0.05339999999999996\n",
      "trajectorySteps 18\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [11  2  1  1  0]\n",
      " [ 0  0  2  1  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️🔄\n",
      "🔄⬅️⏬🔄⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0253 -0.0398 -0.0292 -0.0103 -0.0167]\n",
      " [-0.0553 -0.0213 -0.0115 -0.0098 -0.0027]\n",
      " [-0.0287 -0.0695  0.0302 -0.0102 -0.0021]\n",
      " [-0.0511  0.069   0.2546  0.0119 -0.0054]\n",
      " [-0.0383 -0.0067  0.0629 -0.0011 -0.001 ]]\n",
      "mean_state_value -0.0003007947070502398\n",
      "episode 233/600\n",
      "p1 0.7872000000000001 p0 0.053199999999999956\n",
      "trajectorySteps 126\n",
      "[[ 1  1  2  2  0]\n",
      " [15  1  1  2  5]\n",
      " [77  5  1  2  1]\n",
      " [ 6  0  2  0  0]\n",
      " [ 2  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0253 -0.0397 -0.0291 -0.0102 -0.0167]\n",
      " [-0.0557 -0.0213 -0.0114 -0.0102 -0.0027]\n",
      " [-0.0291 -0.0694  0.0312 -0.0106 -0.0021]\n",
      " [-0.0511  0.0691  0.2564  0.0119 -0.0054]\n",
      " [-0.0382 -0.0067  0.063  -0.0011 -0.001 ]]\n",
      "mean_state_value -0.00021714785776702752\n",
      "episode 234/600\n",
      "p1 0.7880000000000001 p0 0.05299999999999996\n",
      "trajectorySteps 85\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [ 8  0  2  0  0]\n",
      " [69  1  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0252 -0.0396 -0.029  -0.0102 -0.0166]\n",
      " [-0.0555 -0.0212 -0.0114 -0.0102 -0.0027]\n",
      " [-0.0291 -0.0692  0.0313 -0.0106 -0.0021]\n",
      " [-0.0509  0.0693  0.2582  0.012  -0.0054]\n",
      " [-0.0393 -0.0066  0.064  -0.0011 -0.001 ]]\n",
      "mean_state_value -8.979675810835655e-05\n",
      "episode 235/600\n",
      "p1 0.7888000000000002 p0 0.05279999999999996\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0252 -0.0394 -0.0289 -0.0102 -0.0165]\n",
      " [-0.0553 -0.0212 -0.0114 -0.0102 -0.0027]\n",
      " [-0.0291 -0.0694  0.0314 -0.0106 -0.0021]\n",
      " [-0.0508  0.0703  0.26    0.012  -0.0053]\n",
      " [-0.0392 -0.0066  0.0641 -0.0011 -0.001 ]]\n",
      "mean_state_value 6.751058945981165e-05\n",
      "episode 236/600\n",
      "p1 0.7896000000000002 p0 0.05259999999999996\n",
      "trajectorySteps 40\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [17  0  0  0  0]\n",
      " [20  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.0393 -0.0288 -0.0101 -0.0165]\n",
      " [-0.0552 -0.0211 -0.0113 -0.0101 -0.0027]\n",
      " [-0.0292 -0.0692  0.0314 -0.0105 -0.0021]\n",
      " [-0.0513  0.0713  0.2611  0.012  -0.0053]\n",
      " [-0.0391 -0.0066  0.0641 -0.0011 -0.001 ]]\n",
      "mean_state_value 0.0001833334598261679\n",
      "episode 237/600\n",
      "p1 0.7904000000000002 p0 0.05239999999999996\n",
      "trajectorySteps 26\n",
      "[[1 1 1 1 0]\n",
      " [2 0 0 1 3]\n",
      " [4 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 3 2 2]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.0392 -0.0287 -0.0101 -0.0164]\n",
      " [-0.055  -0.021  -0.0113 -0.0101 -0.0027]\n",
      " [-0.0291 -0.069   0.0315 -0.0105 -0.0021]\n",
      " [-0.0511  0.0715  0.2617  0.012  -0.0053]\n",
      " [-0.039  -0.0065  0.0651 -0.001  -0.001 ]]\n",
      "mean_state_value 0.0003059789824689633\n",
      "episode 238/600\n",
      "p1 0.7912000000000001 p0 0.052199999999999955\n",
      "trajectorySteps 15\n",
      "[[0 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [5 0 0 0 0]\n",
      " [1 0 2 0 0]\n",
      " [1 1 2 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.039  -0.0286 -0.0101 -0.0164]\n",
      " [-0.0549 -0.0209 -0.0112 -0.0101 -0.0027]\n",
      " [-0.0292 -0.0687  0.0315 -0.0104 -0.0021]\n",
      " [-0.051   0.0716  0.2635  0.012  -0.0053]\n",
      " [-0.0393 -0.0065  0.0661 -0.001  -0.001 ]]\n",
      "mean_state_value 0.00046046005232097654\n",
      "episode 239/600\n",
      "p1 0.7920000000000001 p0 0.051999999999999956\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [1 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬆️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0389 -0.0285 -0.01   -0.0163]\n",
      " [-0.0547 -0.0209 -0.0112 -0.01   -0.0026]\n",
      " [-0.0292 -0.0685  0.0316 -0.0104 -0.0021]\n",
      " [-0.0513  0.0726  0.2654  0.012  -0.0053]\n",
      " [-0.0392 -0.0064  0.0662 -0.0009 -0.001 ]]\n",
      "mean_state_value 0.0006176581237208015\n",
      "episode 240/600\n",
      "p1 0.7928000000000002 p0 0.05179999999999996\n",
      "trajectorySteps 59\n",
      "[[ 1  1  1  1  0]\n",
      " [ 1  0  0  1 13]\n",
      " [ 1  0  0  0 19]\n",
      " [ 0  0  2  1 16]\n",
      " [ 0  0  0  0  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️🔄\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0249 -0.0388 -0.0284 -0.01   -0.0162]\n",
      " [-0.0545 -0.0208 -0.0112 -0.01   -0.0028]\n",
      " [-0.0292 -0.0682  0.0317 -0.0104 -0.0021]\n",
      " [-0.0511  0.0727  0.2672  0.013  -0.0058]\n",
      " [-0.0391 -0.0064  0.0663 -0.0009 -0.001 ]]\n",
      "mean_state_value 0.0007686285752016377\n",
      "episode 241/600\n",
      "p1 0.7936000000000002 p0 0.05159999999999996\n",
      "trajectorySteps 119\n",
      "[[ 1  1  1  1  0]\n",
      " [ 2  0  0  4 43]\n",
      " [ 2  1  0  6 51]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬🔄⬆️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0249 -0.0386 -0.0283 -0.01   -0.0162]\n",
      " [-0.0544 -0.0207 -0.0111 -0.0099 -0.0028]\n",
      " [-0.0291 -0.068   0.0317 -0.0103 -0.0023]\n",
      " [-0.051   0.0729  0.269   0.013  -0.0057]\n",
      " [-0.039  -0.0064  0.0673 -0.0009 -0.001 ]]\n",
      "mean_state_value 0.0009337006391730163\n",
      "episode 242/600\n",
      "p1 0.7944000000000002 p0 0.05139999999999996\n",
      "trajectorySteps 196\n",
      "[[43 38  2  3  2]\n",
      " [ 9  5  1  6 39]\n",
      " [ 1  0  1  7 37]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0259 -0.0406 -0.0282 -0.0099 -0.0161]\n",
      " [-0.0542 -0.0207 -0.0116 -0.0104 -0.0028]\n",
      " [-0.0291 -0.0678  0.0328 -0.0108 -0.0024]\n",
      " [-0.0508  0.073   0.2708  0.0131 -0.0057]\n",
      " [-0.0389 -0.0063  0.0674 -0.0009 -0.001 ]]\n",
      "mean_state_value 0.0009189511289700833\n",
      "episode 243/600\n",
      "p1 0.7952000000000001 p0 0.051199999999999954\n",
      "trajectorySteps 19\n",
      "[[0 0 0 0 0]\n",
      " [4 1 1 1 1]\n",
      " [4 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0258 -0.0404 -0.0281 -0.0099 -0.016 ]\n",
      " [-0.0546 -0.0211 -0.0115 -0.0104 -0.0028]\n",
      " [-0.0291 -0.0675  0.0328 -0.0108 -0.0024]\n",
      " [-0.0506  0.0731  0.2727  0.0131 -0.0057]\n",
      " [-0.0388 -0.0063  0.0684 -0.0008 -0.001 ]]\n",
      "mean_state_value 0.001056064698347217\n",
      "episode 244/600\n",
      "p1 0.7960000000000002 p0 0.050999999999999955\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 2]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬇️⏬⏩️🔄⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0258 -0.0403 -0.028  -0.0099 -0.016 ]\n",
      " [-0.0544 -0.0211 -0.0115 -0.0103 -0.0029]\n",
      " [-0.0291 -0.0673  0.0329 -0.0107 -0.0024]\n",
      " [-0.0504  0.0732  0.2745  0.0131 -0.0057]\n",
      " [-0.0386 -0.0063  0.0695 -0.0008 -0.001 ]]\n",
      "mean_state_value 0.0012331581350588795\n",
      "episode 245/600\n",
      "p1 0.7968000000000002 p0 0.050799999999999956\n",
      "trajectorySteps 21\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [4 0 0 0 0]\n",
      " [7 1 2 0 0]\n",
      " [7 0 0 0 0]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬇️⏬⏩️🔄⬇️\n",
      "🔄⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0257 -0.0402 -0.0279 -0.0098 -0.0159]\n",
      " [-0.0542 -0.021  -0.0115 -0.0103 -0.0029]\n",
      " [-0.0291 -0.067   0.0329 -0.0107 -0.0024]\n",
      " [-0.0509  0.0743  0.2751  0.0131 -0.0056]\n",
      " [-0.0388 -0.0063  0.0696 -0.0008 -0.001 ]]\n",
      "mean_state_value 0.0013306463215467903\n",
      "episode 246/600\n",
      "p1 0.7976000000000002 p0 0.05059999999999996\n",
      "trajectorySteps 25\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [17  3  0  0  0]\n",
      " [ 0  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬇️⏬⏩️🔄⬇️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0256 -0.04   -0.0278 -0.0098 -0.0159]\n",
      " [-0.054  -0.0209 -0.0114 -0.0103 -0.0029]\n",
      " [-0.0291 -0.0673  0.033  -0.0106 -0.0024]\n",
      " [-0.0507  0.0753  0.277   0.0131 -0.0056]\n",
      " [-0.0386 -0.0062  0.0696 -0.0008 -0.001 ]]\n",
      "mean_state_value 0.0014860514625177484\n",
      "episode 247/600\n",
      "p1 0.7984000000000002 p0 0.05039999999999996\n",
      "trajectorySteps 115\n",
      "[[ 1  4  4  0  1]\n",
      " [17  1  1 15  4]\n",
      " [21  3  0 16 19]\n",
      " [ 1  0  2  1  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0256 -0.04   -0.0282 -0.0098 -0.0158]\n",
      " [-0.0544 -0.0209 -0.0114 -0.0102 -0.0029]\n",
      " [-0.0292 -0.0671  0.0331 -0.0111 -0.0025]\n",
      " [-0.0505  0.0755  0.2776  0.0131 -0.0056]\n",
      " [-0.0385 -0.0062  0.0707 -0.0007 -0.001 ]]\n",
      "mean_state_value 0.0015360718458259609\n",
      "episode 248/600\n",
      "p1 0.7992000000000001 p0 0.05019999999999995\n",
      "trajectorySteps 33\n",
      "[[ 1  2  1  1  0]\n",
      " [ 2  0  0  1  1]\n",
      " [14  1  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  3  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0256 -0.0399 -0.0281 -0.0097 -0.0157]\n",
      " [-0.0542 -0.0208 -0.0113 -0.0102 -0.0029]\n",
      " [-0.0292 -0.0669  0.0331 -0.0111 -0.0025]\n",
      " [-0.0504  0.0756  0.2795  0.0132 -0.0056]\n",
      " [-0.0384 -0.0062  0.0717 -0.0007 -0.001 ]]\n",
      "mean_state_value 0.0017101767225009008\n",
      "episode 249/600\n",
      "p1 0.8000000000000002 p0 0.049999999999999954\n",
      "trajectorySteps 26\n",
      "[[1 2 1 1 0]\n",
      " [2 0 0 1 1]\n",
      " [6 0 0 0 2]\n",
      " [3 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0255 -0.0398 -0.028  -0.0097 -0.0157]\n",
      " [-0.0541 -0.0207 -0.0113 -0.0102 -0.0029]\n",
      " [-0.0293 -0.0666  0.0332 -0.011  -0.0024]\n",
      " [-0.0502  0.0757  0.2813  0.0132 -0.0056]\n",
      " [-0.0383 -0.0061  0.0728 -0.0006 -0.001 ]]\n",
      "mean_state_value 0.001885323129503312\n",
      "episode 250/600\n",
      "p1 0.8008000000000002 p0 0.049799999999999955\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0255 -0.0396 -0.0279 -0.0097 -0.0156]\n",
      " [-0.0539 -0.0207 -0.0112 -0.0101 -0.0029]\n",
      " [-0.0293 -0.0664  0.0332 -0.011  -0.0024]\n",
      " [-0.0505  0.0768  0.2832  0.0132 -0.0055]\n",
      " [-0.0382 -0.0061  0.0729 -0.0006 -0.001 ]]\n",
      "mean_state_value 0.002048127959529401\n",
      "episode 251/600\n",
      "p1 0.8016000000000002 p0 0.04959999999999996\n",
      "trajectorySteps 73\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [15  0  0  0  0]\n",
      " [27  1  2  0  0]\n",
      " [28  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0254 -0.0395 -0.0278 -0.0096 -0.0156]\n",
      " [-0.0537 -0.0206 -0.0112 -0.0101 -0.0029]\n",
      " [-0.0293 -0.0661  0.0333 -0.0109 -0.0024]\n",
      " [-0.0509  0.0779  0.2839  0.0132 -0.0055]\n",
      " [-0.039  -0.0061  0.0729 -0.0006 -0.001 ]]\n",
      "mean_state_value 0.0021157753467068497\n",
      "episode 252/600\n",
      "p1 0.8024000000000002 p0 0.04939999999999996\n",
      "trajectorySteps 34\n",
      "[[0 1 2 4 0]\n",
      " [0 1 0 3 1]\n",
      " [8 6 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬇️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0254 -0.0393 -0.0277 -0.0096 -0.0155]\n",
      " [-0.0535 -0.0205 -0.0112 -0.01   -0.0029]\n",
      " [-0.0293 -0.0664  0.0334 -0.0109 -0.0024]\n",
      " [-0.0508  0.078   0.2857  0.0132 -0.0055]\n",
      " [-0.0389 -0.006   0.074  -0.0005 -0.001 ]]\n",
      "mean_state_value 0.002276470133199564\n",
      "episode 253/600\n",
      "p1 0.8032000000000001 p0 0.04919999999999995\n",
      "trajectorySteps 39\n",
      "[[ 1  1  1  1  0]\n",
      " [ 1  0  0  1  1]\n",
      " [12  0  0  0  3]\n",
      " [10  0  2  0  1]\n",
      " [ 1  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0253 -0.0392 -0.0276 -0.0096 -0.0154]\n",
      " [-0.0533 -0.0205 -0.0111 -0.01   -0.0029]\n",
      " [-0.0294 -0.0662  0.0334 -0.0108 -0.0025]\n",
      " [-0.0507  0.0781  0.2876  0.0132 -0.0055]\n",
      " [-0.0388 -0.006   0.075  -0.0005 -0.001 ]]\n",
      "mean_state_value 0.002443931537682884\n",
      "episode 254/600\n",
      "p1 0.8040000000000002 p0 0.04899999999999995\n",
      "trajectorySteps 37\n",
      "[[ 0  0  1  3  0]\n",
      " [ 0  2  1  2  1]\n",
      " [ 1 21  0  0  1]\n",
      " [ 0  0  2  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬⬆️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0253 -0.0391 -0.0275 -0.0096 -0.0154]\n",
      " [-0.0531 -0.0209 -0.0111 -0.01   -0.0028]\n",
      " [-0.0294 -0.067   0.0335 -0.0108 -0.0025]\n",
      " [-0.0506  0.0782  0.2895  0.0143 -0.0059]\n",
      " [-0.0387 -0.006   0.0751 -0.0005 -0.001 ]]\n",
      "mean_state_value 0.0025458476643506717\n",
      "episode 255/600\n",
      "p1 0.8048000000000002 p0 0.048799999999999955\n",
      "trajectorySteps 33\n",
      "[[3 2 1 1 0]\n",
      " [2 1 0 2 1]\n",
      " [6 1 0 1 3]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 3]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️🔄\n",
      "🔄⬅️⏬⬆️🔄\n",
      "⬆️⏩️✅⏪🔄\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0253 -0.0394 -0.0274 -0.0096 -0.0153]\n",
      " [-0.0529 -0.0208 -0.011  -0.0099 -0.0028]\n",
      " [-0.0294 -0.0667  0.0335 -0.0108 -0.0025]\n",
      " [-0.0504  0.0784  0.2913  0.0143 -0.0059]\n",
      " [-0.0385 -0.006   0.0762 -0.0004 -0.001 ]]\n",
      "mean_state_value 0.002705971295463252\n",
      "episode 256/600\n",
      "p1 0.8056000000000002 p0 0.048599999999999956\n",
      "trajectorySteps 85\n",
      "[[ 2  1  4  3  5]\n",
      " [ 1  0  0  3 27]\n",
      " [ 1  0  0  1  8]\n",
      " [ 0  0  2  1 26]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️🔄⬅️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0253 -0.0392 -0.0274 -0.0096 -0.0153]\n",
      " [-0.0528 -0.0207 -0.011  -0.0099 -0.0029]\n",
      " [-0.0293 -0.0665  0.0336 -0.0107 -0.0025]\n",
      " [-0.0502  0.0785  0.292   0.0153 -0.0065]\n",
      " [-0.0384 -0.0059  0.0763 -0.0004 -0.001 ]]\n",
      "mean_state_value 0.0028086591813068808\n",
      "episode 257/600\n",
      "p1 0.8064000000000002 p0 0.04839999999999996\n",
      "trajectorySteps 131\n",
      "[[ 2 42  3  0  0]\n",
      " [ 1  4  1  1  2]\n",
      " [48  2  0  1 12]\n",
      " [ 6  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0253 -0.0411 -0.0277 -0.0095 -0.0153]\n",
      " [-0.0526 -0.0208 -0.0109 -0.0099 -0.0029]\n",
      " [-0.0297 -0.0663  0.0337 -0.0107 -0.0026]\n",
      " [-0.0502  0.0786  0.2939  0.0153 -0.0064]\n",
      " [-0.0383 -0.0059  0.0773 -0.0003 -0.0009]]\n",
      "mean_state_value 0.0028655101514852926\n",
      "episode 258/600\n",
      "p1 0.8072000000000001 p0 0.04819999999999995\n",
      "trajectorySteps 24\n",
      "[[0 1 0 0 0]\n",
      " [0 2 0 0 0]\n",
      " [9 9 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0253 -0.0414 -0.0276 -0.0095 -0.0152]\n",
      " [-0.0524 -0.0207 -0.0109 -0.0098 -0.0029]\n",
      " [-0.0297 -0.0671  0.0347 -0.0106 -0.0025]\n",
      " [-0.05    0.0787  0.2958  0.0154 -0.0064]\n",
      " [-0.0382 -0.0059  0.0774 -0.0003 -0.0009]]\n",
      "mean_state_value 0.0029890826079964575\n",
      "episode 259/600\n",
      "p1 0.8080000000000002 p0 0.04799999999999995\n",
      "trajectorySteps 34\n",
      "[[1 3 2 1 0]\n",
      " [1 0 0 1 1]\n",
      " [9 1 0 0 1]\n",
      " [7 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.520e-02 -4.130e-02 -2.750e-02 -9.500e-03 -1.520e-02]\n",
      " [-5.220e-02 -2.060e-02 -1.090e-02 -9.800e-03 -2.900e-03]\n",
      " [-2.970e-02 -6.680e-02  3.480e-02 -1.060e-02 -2.500e-03]\n",
      " [-4.990e-02  7.890e-02  2.977e-01  1.540e-02 -6.400e-03]\n",
      " [-3.810e-02 -5.800e-03  7.850e-02 -2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.003162881489254233\n",
      "episode 260/600\n",
      "p1 0.8088000000000002 p0 0.047799999999999954\n",
      "trajectorySteps 70\n",
      "[[ 1  2  1  1  0]\n",
      " [ 1  0  0  1  6]\n",
      " [ 1  0  0  1 48]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  2  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.520e-02 -4.120e-02 -2.740e-02 -9.400e-03 -1.510e-02]\n",
      " [-5.200e-02 -2.060e-02 -1.080e-02 -9.700e-03 -2.900e-03]\n",
      " [-2.970e-02 -6.660e-02  3.490e-02 -1.060e-02 -2.600e-03]\n",
      " [-4.980e-02  7.900e-02  2.996e-01  1.540e-02 -6.400e-03]\n",
      " [-3.790e-02 -5.800e-03  7.960e-02 -2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.0033412327160343876\n",
      "episode 261/600\n",
      "p1 0.8096000000000002 p0 0.047599999999999955\n",
      "trajectorySteps 37\n",
      "[[ 1  1  1  2  0]\n",
      " [ 2  2  0  2  1]\n",
      " [15  0  0  0  1]\n",
      " [ 3  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.520e-02 -4.110e-02 -2.730e-02 -9.400e-03 -1.500e-02]\n",
      " [-5.230e-02 -2.100e-02 -1.080e-02 -9.700e-03 -2.900e-03]\n",
      " [-2.970e-02 -6.630e-02  3.490e-02 -1.050e-02 -2.600e-03]\n",
      " [-4.960e-02  7.910e-02  3.014e-01  1.540e-02 -6.300e-03]\n",
      " [-3.780e-02 -5.800e-03  8.060e-02 -1.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.0034819547314724362\n",
      "episode 262/600\n",
      "p1 0.8104000000000002 p0 0.047399999999999956\n",
      "trajectorySteps 119\n",
      "[[ 1  1  2  1  0]\n",
      " [ 1  0  0  1 26]\n",
      " [50  1  0  2 25]\n",
      " [ 2  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.041  -0.0273 -0.0094 -0.015 ]\n",
      " [-0.0521 -0.0209 -0.0107 -0.0097 -0.003 ]\n",
      " [-0.0299 -0.0661  0.035  -0.0105 -0.0026]\n",
      " [-0.0495  0.0793  0.3033  0.0154 -0.0063]\n",
      " [-0.0377 -0.0057  0.0817 -0.     -0.0009]]\n",
      "mean_state_value 0.0036521783025052863\n",
      "episode 263/600\n",
      "p1 0.8112000000000001 p0 0.04719999999999995\n",
      "trajectorySteps 78\n",
      "[[ 1  1  2  3  0]\n",
      " [ 2  0  0 28 27]\n",
      " [ 2  0  0  1  1]\n",
      " [ 0  0  2  0  4]\n",
      " [ 0  0  1  1  2]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️⬇️🔄\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.510e-02 -4.080e-02 -2.720e-02 -9.400e-03 -1.490e-02]\n",
      " [-5.200e-02 -2.080e-02 -1.070e-02 -9.700e-03 -3.100e-03]\n",
      " [-2.990e-02 -6.580e-02  3.500e-02 -1.040e-02 -2.600e-03]\n",
      " [-4.930e-02  7.940e-02  3.052e-01  1.550e-02 -6.300e-03]\n",
      " [-3.760e-02 -5.700e-03  8.280e-02  1.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.0038283815070419857\n",
      "episode 264/600\n",
      "p1 0.8120000000000002 p0 0.04699999999999995\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 0]\n",
      " [2 0 0 1 0]\n",
      " [3 0 0 1 0]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️⬇️🔄\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.510e-02 -4.070e-02 -2.710e-02 -9.400e-03 -1.480e-02]\n",
      " [-5.180e-02 -2.080e-02 -1.060e-02 -9.600e-03 -3.100e-03]\n",
      " [-2.990e-02 -6.560e-02  3.510e-02 -1.080e-02 -2.600e-03]\n",
      " [-4.910e-02  7.950e-02  3.071e-01  1.650e-02 -6.300e-03]\n",
      " [-3.740e-02 -5.700e-03  8.290e-02  1.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.003995201162869742\n",
      "episode 265/600\n",
      "p1 0.8128000000000002 p0 0.04679999999999995\n",
      "trajectorySteps 38\n",
      "[[1 1 2 3 0]\n",
      " [1 0 0 9 0]\n",
      " [2 0 1 9 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 2 1 1]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.50e-02 -4.05e-02 -2.70e-02 -9.30e-03 -1.48e-02]\n",
      " [-5.16e-02 -2.07e-02 -1.06e-02 -9.60e-03 -3.10e-03]\n",
      " [-2.99e-02 -6.53e-02  3.52e-02 -1.13e-02 -2.60e-03]\n",
      " [-4.90e-02  7.96e-02  3.09e-01  1.65e-02 -6.30e-03]\n",
      " [-3.73e-02 -5.70e-03  8.40e-02  2.00e-04 -9.00e-04]]\n",
      "mean_state_value 0.0041621757942103925\n",
      "episode 266/600\n",
      "p1 0.8136000000000002 p0 0.046599999999999954\n",
      "trajectorySteps 33\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [14  1  2  0  0]\n",
      " [15  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.500e-02 -4.040e-02 -2.690e-02 -9.300e-03 -1.470e-02]\n",
      " [-5.140e-02 -2.060e-02 -1.060e-02 -9.600e-03 -3.100e-03]\n",
      " [-2.980e-02 -6.510e-02  3.520e-02 -1.120e-02 -2.600e-03]\n",
      " [-4.930e-02  8.070e-02  3.109e-01  1.660e-02 -6.300e-03]\n",
      " [-3.760e-02 -5.600e-03  8.410e-02  2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.004317163412299922\n",
      "episode 267/600\n",
      "p1 0.8144000000000002 p0 0.046399999999999955\n",
      "trajectorySteps 172\n",
      "[[153   1   1   1   0]\n",
      " [  5   0   0   1   1]\n",
      " [  2   0   0   0   1]\n",
      " [  0   0   2   0   1]\n",
      " [  0   0   1   1   1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.560e-02 -4.020e-02 -2.680e-02 -9.300e-03 -1.470e-02]\n",
      " [-5.130e-02 -2.050e-02 -1.050e-02 -9.500e-03 -3.000e-03]\n",
      " [-2.980e-02 -6.480e-02  3.530e-02 -1.120e-02 -2.600e-03]\n",
      " [-4.910e-02  8.090e-02  3.129e-01  1.660e-02 -6.200e-03]\n",
      " [-3.740e-02 -5.600e-03  8.520e-02  2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.004479629047943929\n",
      "episode 268/600\n",
      "p1 0.8152000000000001 p0 0.04619999999999995\n",
      "trajectorySteps 36\n",
      "[[2 1 1 2 0]\n",
      " [4 0 1 3 9]\n",
      " [6 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.560e-02 -4.010e-02 -2.670e-02 -9.200e-03 -1.460e-02]\n",
      " [-5.110e-02 -2.050e-02 -1.050e-02 -9.900e-03 -3.000e-03]\n",
      " [-2.980e-02 -6.460e-02  3.530e-02 -1.110e-02 -2.600e-03]\n",
      " [-4.890e-02  8.100e-02  3.136e-01  1.660e-02 -6.200e-03]\n",
      " [-3.730e-02 -5.600e-03  8.620e-02  3.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.004597764324005548\n",
      "episode 269/600\n",
      "p1 0.8160000000000002 p0 0.04599999999999995\n",
      "trajectorySteps 25\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 9  0  0  0  0]\n",
      " [10  2  2  0  0]\n",
      " [ 2  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.550e-02 -3.990e-02 -2.660e-02 -9.200e-03 -1.450e-02]\n",
      " [-5.090e-02 -2.040e-02 -1.040e-02 -9.900e-03 -3.000e-03]\n",
      " [-2.990e-02 -6.430e-02  3.540e-02 -1.110e-02 -2.500e-03]\n",
      " [-4.930e-02  8.170e-02  3.143e-01  1.660e-02 -6.200e-03]\n",
      " [-3.720e-02 -5.500e-03  8.630e-02  3.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.004691979357074862\n",
      "episode 270/600\n",
      "p1 0.8168000000000002 p0 0.04579999999999995\n",
      "trajectorySteps 50\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  1  0  0  0]\n",
      " [25 20  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.540e-02 -3.980e-02 -2.650e-02 -9.200e-03 -1.450e-02]\n",
      " [-5.070e-02 -2.030e-02 -1.040e-02 -9.900e-03 -3.000e-03]\n",
      " [-3.000e-02 -6.530e-02  3.550e-02 -1.100e-02 -2.500e-03]\n",
      " [-4.920e-02  8.280e-02  3.162e-01  1.660e-02 -6.200e-03]\n",
      " [-3.710e-02 -5.500e-03  8.640e-02  3.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.004822575605440359\n",
      "episode 271/600\n",
      "p1 0.8176000000000002 p0 0.04559999999999995\n",
      "trajectorySteps 32\n",
      "[[ 1  1  1  1  0]\n",
      " [17  0  0  1  1]\n",
      " [ 1  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  2  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0254 -0.0396 -0.0264 -0.0091 -0.0144]\n",
      " [-0.0506 -0.0202 -0.0103 -0.0098 -0.003 ]\n",
      " [-0.03   -0.065   0.0355 -0.011  -0.0025]\n",
      " [-0.049   0.0829  0.3182  0.0166 -0.0061]\n",
      " [-0.037  -0.0055  0.0875  0.0004 -0.0009]]\n",
      "mean_state_value 0.005006858868293095\n",
      "episode 272/600\n",
      "p1 0.8184000000000002 p0 0.045399999999999954\n",
      "trajectorySteps 98\n",
      "[[ 1  1  1  1  0]\n",
      " [38  3  0  1  1]\n",
      " [38  0  0  0  1]\n",
      " [ 4  0  2  0  1]\n",
      " [ 0  0  2  2  1]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0254 -0.0395 -0.0263 -0.0091 -0.0143]\n",
      " [-0.0518 -0.0202 -0.0103 -0.0098 -0.003 ]\n",
      " [-0.0302 -0.0648  0.0356 -0.0109 -0.0025]\n",
      " [-0.0489  0.083   0.3201  0.0167 -0.0061]\n",
      " [-0.0369 -0.0054  0.0885  0.0005 -0.0009]]\n",
      "mean_state_value 0.005127791498063824\n",
      "episode 273/600\n",
      "p1 0.8192000000000002 p0 0.04519999999999995\n",
      "trajectorySteps 55\n",
      "[[ 7  6  1  2  5]\n",
      " [ 1  0  0  1 24]\n",
      " [ 1  0  0  0  1]\n",
      " [ 0  0  2  1  1]\n",
      " [ 0  0  0  1  1]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0255 -0.0394 -0.0262 -0.0091 -0.0143]\n",
      " [-0.0516 -0.0201 -0.0103 -0.0097 -0.0031]\n",
      " [-0.0301 -0.0645  0.0357 -0.0109 -0.0025]\n",
      " [-0.0487  0.0832  0.322   0.0177 -0.0061]\n",
      " [-0.0367 -0.0054  0.0887  0.     -0.0009]]\n",
      "mean_state_value 0.005289226588906387\n",
      "episode 274/600\n",
      "p1 0.8200000000000002 p0 0.04499999999999995\n",
      "trajectorySteps 32\n",
      "[[7 1 0 0 1]\n",
      " [7 1 1 1 2]\n",
      " [3 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.54e-02 -3.96e-02 -2.61e-02 -9.10e-03 -1.42e-02]\n",
      " [-5.15e-02 -2.05e-02 -1.02e-02 -9.70e-03 -3.10e-03]\n",
      " [-3.01e-02 -6.43e-02  3.57e-02 -1.08e-02 -2.50e-03]\n",
      " [-4.85e-02  8.33e-02  3.24e-01  1.78e-02 -6.00e-03]\n",
      " [-3.66e-02 -5.40e-03  8.98e-02  1.00e-04 -9.00e-04]]\n",
      "mean_state_value 0.005439015975053564\n",
      "episode 275/600\n",
      "p1 0.8208000000000002 p0 0.04479999999999995\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [7 2 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.540e-02 -3.950e-02 -2.590e-02 -9.000e-03 -1.420e-02]\n",
      " [-5.130e-02 -2.040e-02 -1.020e-02 -9.700e-03 -3.100e-03]\n",
      " [-3.010e-02 -6.440e-02  3.680e-02 -1.080e-02 -2.500e-03]\n",
      " [-4.840e-02  8.340e-02  3.259e-01  1.780e-02 -6.000e-03]\n",
      " [-3.650e-02 -5.400e-03  8.990e-02  1.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.005613169512037225\n",
      "episode 276/600\n",
      "p1 0.8216000000000002 p0 0.04459999999999995\n",
      "trajectorySteps 47\n",
      "[[ 2  2  1  1  0]\n",
      " [ 1  0  0  1  1]\n",
      " [16  1  0  0  1]\n",
      " [12  0  2  0  2]\n",
      " [ 0  0  1  1  2]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.540e-02 -3.930e-02 -2.580e-02 -9.000e-03 -1.410e-02]\n",
      " [-5.110e-02 -2.030e-02 -1.010e-02 -9.600e-03 -3.100e-03]\n",
      " [-3.020e-02 -6.420e-02  3.690e-02 -1.070e-02 -2.500e-03]\n",
      " [-4.820e-02  8.360e-02  3.267e-01  1.780e-02 -6.000e-03]\n",
      " [-3.640e-02 -5.300e-03  9.100e-02  2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.005753077570959439\n",
      "episode 277/600\n",
      "p1 0.8224000000000002 p0 0.04439999999999995\n",
      "trajectorySteps 98\n",
      "[[ 1  3  4  2  2]\n",
      " [ 1  0  0  5 32]\n",
      " [11  1  1  3 30]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️🔄\n",
      "🔄⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.530e-02 -3.920e-02 -2.570e-02 -9.000e-03 -1.400e-02]\n",
      " [-5.090e-02 -2.020e-02 -1.010e-02 -9.600e-03 -3.100e-03]\n",
      " [-3.020e-02 -6.400e-02  3.800e-02 -1.110e-02 -2.600e-03]\n",
      " [-4.800e-02  8.370e-02  3.286e-01  1.780e-02 -6.000e-03]\n",
      " [-3.630e-02 -5.300e-03  9.110e-02  2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.0059143450941227405\n",
      "episode 278/600\n",
      "p1 0.8232000000000002 p0 0.04419999999999995\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [9 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️🔄\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.530e-02 -3.910e-02 -2.560e-02 -9.000e-03 -1.400e-02]\n",
      " [-5.070e-02 -2.010e-02 -1.000e-02 -9.500e-03 -3.100e-03]\n",
      " [-3.020e-02 -6.370e-02  3.810e-02 -1.110e-02 -2.600e-03]\n",
      " [-4.840e-02  8.480e-02  3.306e-01  1.780e-02 -5.900e-03]\n",
      " [-3.610e-02 -5.300e-03  9.120e-02  2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.006086524006426427\n",
      "episode 279/600\n",
      "p1 0.8240000000000002 p0 0.04399999999999995\n",
      "trajectorySteps 114\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [29  1  0  0  0]\n",
      " [ 8  0  2  0  0]\n",
      " [72  1  1  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️🔄\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.520e-02 -3.890e-02 -2.550e-02 -8.900e-03 -1.390e-02]\n",
      " [-5.050e-02 -2.010e-02 -1.000e-02 -9.500e-03 -3.100e-03]\n",
      " [-3.040e-02 -6.350e-02  3.810e-02 -1.100e-02 -2.600e-03]\n",
      " [-4.830e-02  8.500e-02  3.325e-01  1.790e-02 -5.900e-03]\n",
      " [-3.720e-02 -5.200e-03  9.230e-02  2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.006216052139255943\n",
      "episode 280/600\n",
      "p1 0.8248000000000002 p0 0.04379999999999995\n",
      "trajectorySteps 139\n",
      "[[27 24  1  1  0]\n",
      " [ 3  1  0  3 14]\n",
      " [27  2  0  0  2]\n",
      " [27  0  2  1  2]\n",
      " [ 0  0  0  1  1]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.540e-02 -3.920e-02 -2.540e-02 -8.900e-03 -1.390e-02]\n",
      " [-5.040e-02 -2.000e-02 -1.000e-02 -9.500e-03 -3.100e-03]\n",
      " [-3.040e-02 -6.320e-02  3.820e-02 -1.100e-02 -2.600e-03]\n",
      " [-4.830e-02  8.510e-02  3.344e-01  1.900e-02 -5.900e-03]\n",
      " [-3.700e-02 -5.100e-03  9.240e-02 -2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.00635651378294157\n",
      "episode 281/600\n",
      "p1 0.8256000000000002 p0 0.04359999999999995\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [9 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.530e-02 -3.900e-02 -2.530e-02 -8.900e-03 -1.380e-02]\n",
      " [-5.020e-02 -1.990e-02 -9.900e-03 -9.400e-03 -3.000e-03]\n",
      " [-3.050e-02 -6.340e-02  3.830e-02 -1.090e-02 -2.600e-03]\n",
      " [-4.810e-02  8.620e-02  3.364e-01  1.900e-02 -5.900e-03]\n",
      " [-3.690e-02 -5.100e-03  9.250e-02 -2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.006530055853330231\n",
      "episode 282/600\n",
      "p1 0.8264000000000002 p0 0.04339999999999995\n",
      "trajectorySteps 16\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [6 0 2 0 0]\n",
      " [5 1 1 0 0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.530e-02 -3.890e-02 -2.520e-02 -8.800e-03 -1.370e-02]\n",
      " [-5.000e-02 -1.990e-02 -9.900e-03 -9.400e-03 -3.000e-03]\n",
      " [-3.040e-02 -6.310e-02  3.830e-02 -1.090e-02 -2.500e-03]\n",
      " [-4.810e-02  8.640e-02  3.372e-01  1.900e-02 -5.800e-03]\n",
      " [-3.730e-02 -5.000e-03  9.360e-02 -1.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.006652953226048284\n",
      "episode 283/600\n",
      "p1 0.8272000000000002 p0 0.04319999999999995\n",
      "trajectorySteps 40\n",
      "[[10  1  3  1  2]\n",
      " [10  0  0  0  1]\n",
      " [ 3  0  0  0  1]\n",
      " [ 0  0  2  0  2]\n",
      " [ 0  0  1  1  2]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.550e-02 -3.870e-02 -2.520e-02 -8.800e-03 -1.370e-02]\n",
      " [-4.990e-02 -1.980e-02 -9.800e-03 -9.300e-03 -3.000e-03]\n",
      " [-3.040e-02 -6.290e-02  3.840e-02 -1.080e-02 -2.500e-03]\n",
      " [-4.790e-02  8.650e-02  3.392e-01  1.900e-02 -5.800e-03]\n",
      " [-3.720e-02 -5.000e-03  9.470e-02 -1.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.006827089436624813\n",
      "episode 284/600\n",
      "p1 0.8280000000000002 p0 0.04299999999999995\n",
      "trajectorySteps 47\n",
      "[[ 9  1  2  1  0]\n",
      " [10  1  0  1  1]\n",
      " [ 9  2  0  0  2]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  2  2]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0256 -0.0386 -0.0251 -0.0088 -0.0136]\n",
      " [-0.0502 -0.0197 -0.0098 -0.0093 -0.003 ]\n",
      " [-0.0304 -0.0626  0.0384 -0.0108 -0.0026]\n",
      " [-0.0477  0.0866  0.3411  0.019  -0.0058]\n",
      " [-0.0371 -0.0049  0.0959  0.     -0.0009]]\n",
      "mean_state_value 0.006991007189938476\n",
      "episode 285/600\n",
      "p1 0.8288000000000002 p0 0.04279999999999995\n",
      "trajectorySteps 29\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [20  1  0  0  0]\n",
      " [ 3  1  2  0  0]\n",
      " [ 2  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.550e-02 -3.840e-02 -2.490e-02 -8.700e-03 -1.360e-02]\n",
      " [-5.000e-02 -1.960e-02 -9.700e-03 -9.300e-03 -3.000e-03]\n",
      " [-3.050e-02 -6.240e-02  3.850e-02 -1.070e-02 -2.600e-03]\n",
      " [-4.790e-02  8.780e-02  3.419e-01  1.910e-02 -5.800e-03]\n",
      " [-3.700e-02 -4.900e-03  9.600e-02  1.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.007113443778657647\n",
      "episode 286/600\n",
      "p1 0.8296000000000002 p0 0.04259999999999995\n",
      "trajectorySteps 44\n",
      "[[ 1  2  2  1  0]\n",
      " [ 1  0  0  1  1]\n",
      " [17  1  0  1  4]\n",
      " [ 0  0  2  0  2]\n",
      " [ 0  1  2  2  3]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.550e-02 -3.830e-02 -2.480e-02 -8.700e-03 -1.350e-02]\n",
      " [-4.980e-02 -1.960e-02 -9.700e-03 -9.200e-03 -3.000e-03]\n",
      " [-3.060e-02 -6.210e-02  3.860e-02 -1.070e-02 -2.600e-03]\n",
      " [-4.770e-02  8.790e-02  3.439e-01  1.910e-02 -5.700e-03]\n",
      " [-3.690e-02 -4.800e-03  9.670e-02  1.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.007286454463844755\n",
      "episode 287/600\n",
      "p1 0.8304000000000002 p0 0.04239999999999995\n",
      "trajectorySteps 40\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [13  0  0  0  0]\n",
      " [20  1  2  0  0]\n",
      " [ 1  2  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.540e-02 -3.810e-02 -2.470e-02 -8.600e-03 -1.350e-02]\n",
      " [-4.960e-02 -1.950e-02 -9.600e-03 -9.200e-03 -3.000e-03]\n",
      " [-3.060e-02 -6.190e-02  3.860e-02 -1.060e-02 -2.600e-03]\n",
      " [-4.760e-02  8.760e-02  3.459e-01  1.910e-02 -5.700e-03]\n",
      " [-3.720e-02 -5.100e-03  9.780e-02  2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.0074307569584514035\n",
      "episode 288/600\n",
      "p1 0.8312000000000002 p0 0.042199999999999946\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [6 3 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.540e-02 -3.800e-02 -2.460e-02 -8.600e-03 -1.340e-02]\n",
      " [-4.940e-02 -1.940e-02 -9.600e-03 -9.100e-03 -3.000e-03]\n",
      " [-3.060e-02 -6.210e-02  3.870e-02 -1.060e-02 -2.600e-03]\n",
      " [-4.740e-02  8.880e-02  3.479e-01  1.910e-02 -5.700e-03]\n",
      " [-3.700e-02 -5.100e-03  9.790e-02  2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.0076054931101831105\n",
      "episode 289/600\n",
      "p1 0.8320000000000002 p0 0.04199999999999995\n",
      "trajectorySteps 14\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.530e-02 -3.780e-02 -2.450e-02 -8.600e-03 -1.330e-02]\n",
      " [-4.920e-02 -1.930e-02 -9.600e-03 -9.100e-03 -3.000e-03]\n",
      " [-3.060e-02 -6.180e-02  3.880e-02 -1.100e-02 -2.600e-03]\n",
      " [-4.720e-02  8.890e-02  3.498e-01  2.030e-02 -5.700e-03]\n",
      " [-3.690e-02 -5.000e-03  9.800e-02  2.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.007783876700486768\n",
      "episode 290/600\n",
      "p1 0.8328000000000002 p0 0.04179999999999995\n",
      "trajectorySteps 48\n",
      "[[ 1  1  1  1  0]\n",
      " [ 1  0  0  1  2]\n",
      " [ 1  0  0  2 30]\n",
      " [ 0  0  2  0  2]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️🔄⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.530e-02 -3.770e-02 -2.440e-02 -8.500e-03 -1.330e-02]\n",
      " [-4.910e-02 -1.920e-02 -9.500e-03 -9.100e-03 -2.900e-03]\n",
      " [-3.060e-02 -6.150e-02  3.880e-02 -1.090e-02 -2.600e-03]\n",
      " [-4.710e-02  8.910e-02  3.518e-01  2.030e-02 -5.700e-03]\n",
      " [-3.680e-02 -5.000e-03  9.920e-02  3.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.007976516586807668\n",
      "episode 291/600\n",
      "p1 0.8336000000000002 p0 0.04159999999999995\n",
      "trajectorySteps 6\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️🔄⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.520e-02 -3.750e-02 -2.430e-02 -8.500e-03 -1.320e-02]\n",
      " [-4.890e-02 -1.920e-02 -9.500e-03 -9.000e-03 -2.900e-03]\n",
      " [-3.060e-02 -6.130e-02  3.890e-02 -1.090e-02 -2.600e-03]\n",
      " [-4.730e-02  9.020e-02  3.538e-01  2.030e-02 -5.600e-03]\n",
      " [-3.660e-02 -5.000e-03  9.930e-02  3.000e-04 -9.000e-04]]\n",
      "mean_state_value 0.008157302659417285\n",
      "episode 292/600\n",
      "p1 0.8344000000000003 p0 0.04139999999999995\n",
      "trajectorySteps 44\n",
      "[[ 2  2  3  3  0]\n",
      " [ 3  1  1 16  1]\n",
      " [ 3  1  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  2  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️🔄\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0252 -0.0374 -0.0242 -0.0085 -0.0131]\n",
      " [-0.0487 -0.0191 -0.0098 -0.0094 -0.0029]\n",
      " [-0.0305 -0.061   0.039  -0.0108 -0.0026]\n",
      " [-0.0471  0.0904  0.3558  0.0203 -0.0056]\n",
      " [-0.0365 -0.0049  0.1004  0.0004 -0.0009]]\n",
      "mean_state_value 0.008317500517234793\n",
      "episode 293/600\n",
      "p1 0.8352000000000002 p0 0.041199999999999945\n",
      "trajectorySteps 28\n",
      "[[ 1  1  1  0  0]\n",
      " [ 1  0  1  1 13]\n",
      " [ 2  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0252 -0.0372 -0.0245 -0.0085 -0.0131]\n",
      " [-0.0485 -0.019  -0.0098 -0.0094 -0.003 ]\n",
      " [-0.0305 -0.0608  0.039  -0.0108 -0.0026]\n",
      " [-0.0469  0.0905  0.3578  0.0203 -0.0056]\n",
      " [-0.0364 -0.0049  0.1016  0.0005 -0.0008]]\n",
      "mean_state_value 0.00849578961835639\n",
      "episode 294/600\n",
      "p1 0.8360000000000002 p0 0.040999999999999946\n",
      "trajectorySteps 260\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [64  3  0  0  0]\n",
      " [97  0  2  0  0]\n",
      " [91  1  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.0371 -0.0244 -0.0084 -0.013 ]\n",
      " [-0.0483 -0.019  -0.0098 -0.0093 -0.003 ]\n",
      " [-0.0307 -0.0606  0.0391 -0.0107 -0.0026]\n",
      " [-0.0472  0.0906  0.3598  0.0204 -0.0056]\n",
      " [-0.0377 -0.0048  0.1027  0.0005 -0.0008]]\n",
      "mean_state_value 0.008609359254306271\n",
      "episode 295/600\n",
      "p1 0.8368000000000002 p0 0.04079999999999995\n",
      "trajectorySteps 24\n",
      "[[2 2 1 1 0]\n",
      " [1 0 0 2 2]\n",
      " [3 0 0 0 3]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 2]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.037  -0.0243 -0.0084 -0.0129]\n",
      " [-0.0481 -0.0189 -0.0097 -0.0093 -0.003 ]\n",
      " [-0.0306 -0.0603  0.0391 -0.0107 -0.0026]\n",
      " [-0.047   0.0908  0.3618  0.0204 -0.0055]\n",
      " [-0.0375 -0.0048  0.1039  0.0006 -0.0008]]\n",
      "mean_state_value 0.008804864096271882\n",
      "episode 296/600\n",
      "p1 0.8376000000000002 p0 0.04059999999999995\n",
      "trajectorySteps 28\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [12 11  0  0  0]\n",
      " [ 2  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0368 -0.0242 -0.0084 -0.0129]\n",
      " [-0.0479 -0.0188 -0.0097 -0.0092 -0.003 ]\n",
      " [-0.0306 -0.0605  0.0392 -0.0106 -0.0026]\n",
      " [-0.0468  0.092   0.3638  0.0204 -0.0055]\n",
      " [-0.0374 -0.0047  0.104   0.0006 -0.0008]]\n",
      "mean_state_value 0.00898241409369446\n",
      "episode 297/600\n",
      "p1 0.8384000000000003 p0 0.04039999999999995\n",
      "trajectorySteps 48\n",
      "[[17  0  0  0  0]\n",
      " [17  1  1  1  1]\n",
      " [ 3  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0366 -0.0241 -0.0083 -0.0128]\n",
      " [-0.0481 -0.0191 -0.0096 -0.0092 -0.003 ]\n",
      " [-0.0306 -0.0602  0.0393 -0.0106 -0.0026]\n",
      " [-0.0467  0.0921  0.3647  0.0204 -0.0055]\n",
      " [-0.0373 -0.0047  0.1052  0.0007 -0.0008]]\n",
      "mean_state_value 0.00909915542555568\n",
      "episode 298/600\n",
      "p1 0.8392000000000002 p0 0.040199999999999944\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 6 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0365 -0.024  -0.0083 -0.0128]\n",
      " [-0.0479 -0.019  -0.0096 -0.0091 -0.0029]\n",
      " [-0.0306 -0.0605  0.0404 -0.0105 -0.0026]\n",
      " [-0.0465  0.0922  0.3655  0.0204 -0.0055]\n",
      " [-0.0372 -0.0047  0.1053  0.0007 -0.0008]]\n",
      "mean_state_value 0.009235871881731757\n",
      "episode 299/600\n",
      "p1 0.8400000000000002 p0 0.039999999999999945\n",
      "trajectorySteps 61\n",
      "[[ 2  2  1  1  0]\n",
      " [21  1  0  1  1]\n",
      " [22  2  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.0367 -0.0239 -0.0082 -0.0127]\n",
      " [-0.048  -0.019  -0.0095 -0.0091 -0.0029]\n",
      " [-0.0306 -0.0602  0.0405 -0.0104 -0.0026]\n",
      " [-0.0463  0.0924  0.3675  0.0205 -0.0054]\n",
      " [-0.037  -0.0046  0.1065  0.0008 -0.0008]]\n",
      "mean_state_value 0.009405665299485506\n",
      "episode 300/600\n",
      "p1 0.8408000000000002 p0 0.03979999999999995\n",
      "trajectorySteps 75\n",
      "[[ 0  1  1  0  0]\n",
      " [ 8  1  1  1  1]\n",
      " [29  1  0  0  1]\n",
      " [24  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "🔄⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0249 -0.0366 -0.0241 -0.0082 -0.0126]\n",
      " [-0.0482 -0.0189 -0.0095 -0.0091 -0.0029]\n",
      " [-0.0309 -0.06    0.0406 -0.0104 -0.0026]\n",
      " [-0.0464  0.0925  0.3688  0.0205 -0.0054]\n",
      " [-0.0369 -0.0046  0.1076  0.0009 -0.0008]]\n",
      "mean_state_value 0.009520735876313134\n",
      "episode 301/600\n",
      "p1 0.8416000000000002 p0 0.03959999999999995\n",
      "trajectorySteps 18\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [6 0 0 0 0]\n",
      " [4 0 2 0 0]\n",
      " [3 2 1 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "🔄⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0248 -0.0364 -0.024  -0.0082 -0.0126]\n",
      " [-0.048  -0.0188 -0.0094 -0.009  -0.0029]\n",
      " [-0.0309 -0.0597  0.0406 -0.0103 -0.0026]\n",
      " [-0.0463  0.0927  0.3708  0.0205 -0.0054]\n",
      " [-0.0372 -0.0049  0.1088  0.001  -0.0008]]\n",
      "mean_state_value 0.009686901203044496\n",
      "episode 302/600\n",
      "p1 0.8424000000000003 p0 0.03939999999999995\n",
      "trajectorySteps 34\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [30  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬆️\n",
      "⬆️⏩️✅⏪⬆️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0248 -0.0362 -0.0239 -0.0081 -0.0125]\n",
      " [-0.0478 -0.0187 -0.0094 -0.009  -0.0029]\n",
      " [-0.0308 -0.0595  0.0407 -0.0103 -0.0026]\n",
      " [-0.0466  0.0939  0.3729  0.0205 -0.0053]\n",
      " [-0.037  -0.0048  0.1089  0.001  -0.0008]]\n",
      "mean_state_value 0.009867879870797172\n",
      "episode 303/600\n",
      "p1 0.8432000000000002 p0 0.03919999999999994\n",
      "trajectorySteps 799\n",
      "[[  1   1   1   1   9]\n",
      " [  1   0   1  18 327]\n",
      " [ 54   1   0  16 349]\n",
      " [  0   0   2   1  16]\n",
      " [  0   0   0   0   0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️🔄\n",
      "➡️⬅️⏬🔄🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0247 -0.0361 -0.0238 -0.0081 -0.0125]\n",
      " [-0.0476 -0.0187 -0.0094 -0.0093 -0.0034]\n",
      " [-0.0309 -0.0592  0.0408 -0.0103 -0.0034]\n",
      " [-0.0464  0.094   0.3749  0.0217 -0.0057]\n",
      " [-0.0369 -0.0048  0.109   0.001  -0.0008]]\n",
      "mean_state_value 0.009977335277526188\n",
      "episode 304/600\n",
      "p1 0.8440000000000002 p0 0.038999999999999944\n",
      "trajectorySteps 8\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 2 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️🔄\n",
      "➡️⬅️⏬🔄🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0247 -0.0359 -0.0237 -0.0081 -0.0124]\n",
      " [-0.0474 -0.0186 -0.0093 -0.0093 -0.0033]\n",
      " [-0.0309 -0.0593  0.0419 -0.0102 -0.0034]\n",
      " [-0.0462  0.0941  0.3769  0.0217 -0.0057]\n",
      " [-0.0368 -0.0048  0.1092  0.001  -0.0008]]\n",
      "mean_state_value 0.010163902438915644\n",
      "episode 305/600\n",
      "p1 0.8448000000000002 p0 0.038799999999999946\n",
      "trajectorySteps 41\n",
      "[[ 1  1  1  1  0]\n",
      " [ 1  0  0  1  0]\n",
      " [13 12  1  6  0]\n",
      " [ 1  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️🔄\n",
      "⬆️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0246 -0.0358 -0.0236 -0.008  -0.0123]\n",
      " [-0.0472 -0.0185 -0.0093 -0.0092 -0.0033]\n",
      " [-0.0309 -0.0592  0.0431 -0.0106 -0.0034]\n",
      " [-0.0461  0.0943  0.379   0.0217 -0.0057]\n",
      " [-0.0367 -0.0047  0.1093  0.001  -0.0008]]\n",
      "mean_state_value 0.010341263469863431\n",
      "episode 306/600\n",
      "p1 0.8456000000000002 p0 0.03859999999999995\n",
      "trajectorySteps 22\n",
      "[[1 3 1 2 0]\n",
      " [1 0 0 4 0]\n",
      " [3 0 0 2 0]\n",
      " [2 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️🔄\n",
      "⬆️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0246 -0.0357 -0.0235 -0.008  -0.0123]\n",
      " [-0.047  -0.0184 -0.0092 -0.0092 -0.0033]\n",
      " [-0.0309 -0.0589  0.0432 -0.0109 -0.0034]\n",
      " [-0.0459  0.0944  0.381   0.0229 -0.0056]\n",
      " [-0.0365 -0.0047  0.1094  0.001  -0.0008]]\n",
      "mean_state_value 0.010525212129503534\n",
      "episode 307/600\n",
      "p1 0.8464000000000003 p0 0.03839999999999995\n",
      "trajectorySteps 16\n",
      "[[2 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [1 0 0 3 2]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️🔄\n",
      "⬆️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0246 -0.0355 -0.0234 -0.008  -0.0122]\n",
      " [-0.0468 -0.0183 -0.0092 -0.0092 -0.0033]\n",
      " [-0.0309 -0.0586  0.0433 -0.0112 -0.0034]\n",
      " [-0.0457  0.0946  0.383   0.0241 -0.0056]\n",
      " [-0.0364 -0.0047  0.1095  0.0011 -0.0008]]\n",
      "mean_state_value 0.010711800112937214\n",
      "episode 308/600\n",
      "p1 0.8472000000000002 p0 0.03819999999999994\n",
      "trajectorySteps 76\n",
      "[[ 1  1  1  1  3]\n",
      " [ 2  0  0  2 48]\n",
      " [ 1  0  0  1  9]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️⬅️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️🔄\n",
      "⬆️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0245 -0.0354 -0.0233 -0.008  -0.0122]\n",
      " [-0.0466 -0.0183 -0.0091 -0.0091 -0.0035]\n",
      " [-0.0308 -0.0584  0.0433 -0.0112 -0.0034]\n",
      " [-0.0455  0.0947  0.3839  0.0241 -0.0056]\n",
      " [-0.0363 -0.0046  0.1107  0.0012 -0.0008]]\n",
      "mean_state_value 0.010858825215915831\n",
      "episode 309/600\n",
      "p1 0.8480000000000002 p0 0.037999999999999944\n",
      "trajectorySteps 33\n",
      "[[ 1 12  9  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [ 3  0  1  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️⬅️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️🔄\n",
      "⬆️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0245 -0.0353 -0.0235 -0.0079 -0.0121]\n",
      " [-0.0464 -0.0182 -0.0098 -0.0091 -0.0035]\n",
      " [-0.0308 -0.0581  0.0445 -0.0111 -0.0034]\n",
      " [-0.0454  0.0948  0.386   0.0241 -0.0055]\n",
      " [-0.0361 -0.0046  0.1108  0.0012 -0.0008]]\n",
      "mean_state_value 0.011012768472203047\n",
      "episode 310/600\n",
      "p1 0.8488000000000002 p0 0.037799999999999945\n",
      "trajectorySteps 102\n",
      "[[28 31  1  1  1]\n",
      " [ 2  2  0  1 15]\n",
      " [ 1  0  0  1 12]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0247 -0.036  -0.0234 -0.0079 -0.0121]\n",
      " [-0.0463 -0.0181 -0.0098 -0.009  -0.0035]\n",
      " [-0.0308 -0.0578  0.0446 -0.011  -0.0034]\n",
      " [-0.0452  0.095   0.388   0.0241 -0.0055]\n",
      " [-0.036  -0.0046  0.112   0.0013 -0.0008]]\n",
      "mean_state_value 0.011166698546286914\n",
      "episode 311/600\n",
      "p1 0.8496000000000002 p0 0.037599999999999946\n",
      "trajectorySteps 46\n",
      "[[ 7  1  1  1  1]\n",
      " [ 5  0  0  1 20]\n",
      " [ 2  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬇️⏪⏩️➡️🔄\n",
      "⬆️🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0248 -0.0358 -0.0233 -0.0079 -0.012 ]\n",
      " [-0.0461 -0.0181 -0.0097 -0.009  -0.0035]\n",
      " [-0.0307 -0.0576  0.0446 -0.011  -0.0034]\n",
      " [-0.045   0.0951  0.389   0.0242 -0.0055]\n",
      " [-0.0359 -0.0045  0.1132  0.0014 -0.0008]]\n",
      "mean_state_value 0.011318964133504835\n",
      "episode 312/600\n",
      "p1 0.8504000000000003 p0 0.03739999999999995\n",
      "trajectorySteps 52\n",
      "[[ 2  0  0  0  0]\n",
      " [20  1  0  0  0]\n",
      " [19  7  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️🔄\n",
      "⬇️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0248 -0.0357 -0.0232 -0.0078 -0.0119]\n",
      " [-0.0463 -0.018  -0.0097 -0.0089 -0.0035]\n",
      " [-0.0308 -0.0577  0.0458 -0.0109 -0.0034]\n",
      " [-0.0448  0.0953  0.391   0.0242 -0.0055]\n",
      " [-0.0357 -0.0045  0.1134  0.0014 -0.0008]]\n",
      "mean_state_value 0.01148217043448728\n",
      "episode 313/600\n",
      "p1 0.8512000000000002 p0 0.03719999999999994\n",
      "trajectorySteps 157\n",
      "[[ 0  0  0  0  0]\n",
      " [13  1  0  0  0]\n",
      " [51  2  0  0  0]\n",
      " [52  1  2  0  0]\n",
      " [35  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️🔄\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0248 -0.0355 -0.0231 -0.0078 -0.0119]\n",
      " [-0.0465 -0.0179 -0.0096 -0.0089 -0.0035]\n",
      " [-0.031  -0.0575  0.0459 -0.0109 -0.0034]\n",
      " [-0.0458  0.0965  0.3931  0.0242 -0.0054]\n",
      " [-0.0363 -0.0045  0.1135  0.0014 -0.0008]]\n",
      "mean_state_value 0.011588381260269936\n",
      "episode 314/600\n",
      "p1 0.8520000000000002 p0 0.03699999999999994\n",
      "trajectorySteps 42\n",
      "[[ 0  1  1  1  0]\n",
      " [ 0  1  0  1 10]\n",
      " [10  7  0  0  2]\n",
      " [ 2  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️🔄\n",
      "🔄🔄⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0247 -0.0354 -0.023  -0.0077 -0.0118]\n",
      " [-0.0463 -0.0179 -0.0096 -0.0089 -0.0035]\n",
      " [-0.031  -0.0577  0.046  -0.0108 -0.0033]\n",
      " [-0.0456  0.0966  0.3951  0.0242 -0.0054]\n",
      " [-0.0362 -0.0044  0.1147  0.0015 -0.0008]]\n",
      "mean_state_value 0.01177354167112436\n",
      "episode 315/600\n",
      "p1 0.8528000000000002 p0 0.036799999999999944\n",
      "trajectorySteps 65\n",
      "[[11  1  1  1  0]\n",
      " [ 1  1  0  1 11]\n",
      " [ 4 11  0  0 18]\n",
      " [ 0  0  2  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️🔄\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0247 -0.0352 -0.0229 -0.0077 -0.0118]\n",
      " [-0.0461 -0.0178 -0.0095 -0.0088 -0.0036]\n",
      " [-0.0309 -0.0578  0.0461 -0.0108 -0.0033]\n",
      " [-0.0454  0.0968  0.3961  0.0254 -0.0057]\n",
      " [-0.036  -0.0044  0.1148  0.0016 -0.0008]]\n",
      "mean_state_value 0.011901308111752966\n",
      "episode 316/600\n",
      "p1 0.8536000000000002 p0 0.036599999999999945\n",
      "trajectorySteps 48\n",
      "[[ 3  1  1  0  0]\n",
      " [ 3  0  1  1  0]\n",
      " [27  1  0  1  1]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  2  1  1]]\n",
      "⬇️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0246 -0.0351 -0.0231 -0.0077 -0.0117]\n",
      " [-0.0459 -0.0177 -0.0095 -0.0088 -0.0035]\n",
      " [-0.0311 -0.0576  0.0461 -0.0107 -0.0033]\n",
      " [-0.0452  0.0969  0.3982  0.0255 -0.0057]\n",
      " [-0.0359 -0.0044  0.116   0.0017 -0.0007]]\n",
      "mean_state_value 0.01208259816318302\n",
      "episode 317/600\n",
      "p1 0.8544000000000003 p0 0.036399999999999946\n",
      "trajectorySteps 69\n",
      "[[12  1  1  1  2]\n",
      " [13  1  0  3 22]\n",
      " [ 5  1  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️⬅️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0247 -0.0349 -0.023  -0.0077 -0.0116]\n",
      " [-0.0462 -0.0176 -0.0094 -0.0087 -0.0036]\n",
      " [-0.0311 -0.0573  0.0462 -0.0106 -0.0033]\n",
      " [-0.045   0.0971  0.4002  0.0255 -0.0057]\n",
      " [-0.0358 -0.0043  0.1172  0.0018 -0.0007]]\n",
      "mean_state_value 0.012266720015077897\n",
      "episode 318/600\n",
      "p1 0.8552000000000002 p0 0.03619999999999994\n",
      "trajectorySteps 101\n",
      "[[61  1  1  1  0]\n",
      " [ 1  0  0 10 10]\n",
      " [ 7  0  0  1  2]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️⬇️🔄\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0249 -0.0348 -0.0229 -0.0076 -0.0116]\n",
      " [-0.046  -0.0176 -0.0094 -0.0087 -0.0036]\n",
      " [-0.0311 -0.057   0.0463 -0.0106 -0.0033]\n",
      " [-0.0449  0.0972  0.4023  0.0255 -0.0056]\n",
      " [-0.0356 -0.0043  0.1184  0.0019 -0.0007]]\n",
      "mean_state_value 0.01246083346409702\n",
      "episode 319/600\n",
      "p1 0.8560000000000002 p0 0.03599999999999994\n",
      "trajectorySteps 25\n",
      "[[1 1 1 1 0]\n",
      " [2 0 0 1 0]\n",
      " [9 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️⬇️🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0248 -0.0346 -0.0228 -0.0076 -0.0115]\n",
      " [-0.0458 -0.0175 -0.0094 -0.0087 -0.0035]\n",
      " [-0.0311 -0.0568  0.0463 -0.0105 -0.0033]\n",
      " [-0.0447  0.0973  0.4044  0.0255 -0.0056]\n",
      " [-0.0355 -0.0043  0.1196  0.002  -0.0007]]\n",
      "mean_state_value 0.012665250910557318\n",
      "episode 320/600\n",
      "p1 0.8568000000000002 p0 0.03579999999999994\n",
      "trajectorySteps 14\n",
      "[[1 1 3 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 0]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️🔄\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0248 -0.0344 -0.0227 -0.0076 -0.0114]\n",
      " [-0.0456 -0.0174 -0.0093 -0.0086 -0.0035]\n",
      " [-0.0311 -0.0565  0.0464 -0.0108 -0.0033]\n",
      " [-0.0445  0.0975  0.4065  0.0267 -0.0056]\n",
      " [-0.0354 -0.0042  0.1198  0.002  -0.0007]]\n",
      "mean_state_value 0.012858691930513493\n",
      "episode 321/600\n",
      "p1 0.8576000000000003 p0 0.035599999999999944\n",
      "trajectorySteps 125\n",
      "[[ 4  3  2  2  7]\n",
      " [ 3  1  1  3 72]\n",
      " [16  3  0  0  1]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0248 -0.0343 -0.0226 -0.0075 -0.0114]\n",
      " [-0.0454 -0.0173 -0.0096 -0.0089 -0.0037]\n",
      " [-0.0311 -0.0563  0.0465 -0.0108 -0.0033]\n",
      " [-0.0443  0.0976  0.4075  0.0268 -0.0055]\n",
      " [-0.0352 -0.0042  0.121   0.0021 -0.0007]]\n",
      "mean_state_value 0.012977393010613942\n",
      "episode 322/600\n",
      "p1 0.8584000000000003 p0 0.035399999999999945\n",
      "trajectorySteps 24\n",
      "[[2 2 1 1 0]\n",
      " [4 2 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬅️\n",
      "➡️⬅️⏬⬆️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0247 -0.0345 -0.0225 -0.0075 -0.0113]\n",
      " [-0.0456 -0.0173 -0.0096 -0.0089 -0.0037]\n",
      " [-0.031  -0.056   0.0465 -0.0107 -0.0033]\n",
      " [-0.0441  0.0978  0.4096  0.0268 -0.0055]\n",
      " [-0.0351 -0.0041  0.1222  0.0023 -0.0007]]\n",
      "mean_state_value 0.01315885050545602\n",
      "episode 323/600\n",
      "p1 0.8592000000000002 p0 0.03519999999999994\n",
      "trajectorySteps 105\n",
      "[[ 0  1  1  1  2]\n",
      " [ 0  1  0 28 31]\n",
      " [10  8  0  2  8]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  2  4  3]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️🔄🔄\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0247 -0.0344 -0.0224 -0.0075 -0.0113]\n",
      " [-0.0454 -0.0172 -0.0095 -0.0089 -0.0038]\n",
      " [-0.031  -0.0562  0.0466 -0.0107 -0.0033]\n",
      " [-0.0439  0.0979  0.4116  0.0268 -0.0055]\n",
      " [-0.035  -0.0041  0.1234  0.0025 -0.0007]]\n",
      "mean_state_value 0.013345921980221784\n",
      "episode 324/600\n",
      "p1 0.8600000000000002 p0 0.03499999999999994\n",
      "trajectorySteps 40\n",
      "[[ 0  1  1  1  0]\n",
      " [14  1  0  1  0]\n",
      " [14  0  0  1  1]\n",
      " [ 1  0  2  1  1]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️🔄🔄\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0246 -0.0342 -0.0223 -0.0074 -0.0112]\n",
      " [-0.0455 -0.0171 -0.0094 -0.0088 -0.0038]\n",
      " [-0.031  -0.0559  0.0467 -0.0106 -0.0033]\n",
      " [-0.0437  0.0981  0.4137  0.028  -0.0058]\n",
      " [-0.0348 -0.0041  0.1236  0.0025 -0.0007]]\n",
      "mean_state_value 0.013526105471055365\n",
      "episode 325/600\n",
      "p1 0.8608000000000002 p0 0.03479999999999994\n",
      "trajectorySteps 50\n",
      "[[ 1  1  1  1  2]\n",
      " [ 2  0  1  3 26]\n",
      " [ 4  0  0  1  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️🔄🔄\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0246 -0.0341 -0.0221 -0.0074 -0.0112]\n",
      " [-0.0453 -0.0171 -0.0094 -0.0091 -0.0039]\n",
      " [-0.0309 -0.0557  0.0467 -0.0105 -0.0032]\n",
      " [-0.0435  0.0982  0.4159  0.0281 -0.0058]\n",
      " [-0.0347 -0.004   0.1248  0.0026 -0.0007]]\n",
      "mean_state_value 0.01371759945024436\n",
      "episode 326/600\n",
      "p1 0.8616000000000003 p0 0.03459999999999994\n",
      "trajectorySteps 70\n",
      "[[ 1  1  1  3  0]\n",
      " [ 1  0  1 31 20]\n",
      " [ 1  0  0  2  2]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️⬇️🔄\n",
      "🔄⬅️⏬🔄🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0245 -0.0339 -0.022  -0.0074 -0.0111]\n",
      " [-0.0451 -0.017  -0.0094 -0.0094 -0.0039]\n",
      " [-0.0309 -0.0554  0.0468 -0.0105 -0.0032]\n",
      " [-0.0433  0.0983  0.418   0.0281 -0.0057]\n",
      " [-0.0346 -0.004   0.126   0.0027 -0.0007]]\n",
      "mean_state_value 0.013911912687418444\n",
      "episode 327/600\n",
      "p1 0.8624000000000003 p0 0.034399999999999945\n",
      "trajectorySteps 43\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [35  2  0  0  0]\n",
      " [ 3  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️⬇️🔄\n",
      "➡️⬅️⏬🔄🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0245 -0.0337 -0.0219 -0.0073 -0.011 ]\n",
      " [-0.0449 -0.0169 -0.0093 -0.0094 -0.0039]\n",
      " [-0.0311 -0.0551  0.0469 -0.0104 -0.0032]\n",
      " [-0.0435  0.0996  0.4201  0.0281 -0.0057]\n",
      " [-0.0344 -0.004   0.1262  0.0027 -0.0007]]\n",
      "mean_state_value 0.014098899381150108\n",
      "episode 328/600\n",
      "p1 0.8632000000000002 p0 0.03419999999999994\n",
      "trajectorySteps 32\n",
      "[[ 1  1  1  1  0]\n",
      " [ 1  0  1  5  0]\n",
      " [ 1  0  1 17  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏬⏩️➡️🔄\n",
      "➡️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0244 -0.0336 -0.0218 -0.0073 -0.011 ]\n",
      " [-0.0447 -0.0168 -0.0093 -0.0097 -0.0039]\n",
      " [-0.031  -0.0549  0.0481 -0.0107 -0.0032]\n",
      " [-0.0433  0.0997  0.4222  0.0282 -0.0057]\n",
      " [-0.0343 -0.0039  0.1263  0.0028 -0.0007]]\n",
      "mean_state_value 0.014282536364241321\n",
      "episode 329/600\n",
      "p1 0.8640000000000002 p0 0.03399999999999994\n",
      "trajectorySteps 189\n",
      "[[  2   1   1   2   5]\n",
      " [  2   0   1   8 130]\n",
      " [ 12   9   0   0  10]\n",
      " [  0   0   2   0   1]\n",
      " [  0   0   1   1   1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️⬅️\n",
      "🔄🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0244 -0.0334 -0.0217 -0.0073 -0.0109]\n",
      " [-0.0445 -0.0167 -0.0092 -0.01   -0.004 ]\n",
      " [-0.031  -0.0547  0.0482 -0.0107 -0.0032]\n",
      " [-0.0431  0.0999  0.4243  0.0282 -0.0056]\n",
      " [-0.0342 -0.0039  0.1275  0.0029 -0.0007]]\n",
      "mean_state_value 0.014467696861527775\n",
      "episode 330/600\n",
      "p1 0.8648000000000002 p0 0.03379999999999994\n",
      "trajectorySteps 65\n",
      "[[ 0  0  0  0  0]\n",
      " [30  2  0  0  0]\n",
      " [ 6 23  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬅️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0243 -0.0332 -0.0216 -0.0073 -0.0109]\n",
      " [-0.0447 -0.0167 -0.0092 -0.0099 -0.0039]\n",
      " [-0.031  -0.0552  0.0483 -0.0106 -0.0032]\n",
      " [-0.0429  0.1012  0.4264  0.0282 -0.0056]\n",
      " [-0.034  -0.0039  0.1277  0.0029 -0.0007]]\n",
      "mean_state_value 0.014634165279710258\n",
      "episode 331/600\n",
      "p1 0.8656000000000003 p0 0.03359999999999994\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [8 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0243 -0.0331 -0.0215 -0.0072 -0.0108]\n",
      " [-0.0445 -0.0166 -0.0091 -0.0099 -0.0039]\n",
      " [-0.031  -0.0552  0.0483 -0.0105 -0.0032]\n",
      " [-0.0427  0.1024  0.4285  0.0282 -0.0056]\n",
      " [-0.0339 -0.0038  0.1278  0.0029 -0.0007]]\n",
      "mean_state_value 0.014832760173065547\n",
      "episode 332/600\n",
      "p1 0.8664000000000003 p0 0.033399999999999944\n",
      "trajectorySteps 21\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 3 3]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.0329 -0.0214 -0.0072 -0.0107]\n",
      " [-0.0443 -0.0165 -0.0091 -0.0098 -0.004 ]\n",
      " [-0.031  -0.0549  0.0484 -0.0105 -0.0032]\n",
      " [-0.0425  0.1026  0.4307  0.0283 -0.0055]\n",
      " [-0.0338 -0.0038  0.1291  0.003  -0.0007]]\n",
      "mean_state_value 0.015043256599191519\n",
      "episode 333/600\n",
      "p1 0.8672000000000002 p0 0.03319999999999994\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬅️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.0328 -0.0212 -0.0072 -0.0107]\n",
      " [-0.044  -0.0164 -0.009  -0.0098 -0.0039]\n",
      " [-0.0309 -0.055   0.0485 -0.0104 -0.0032]\n",
      " [-0.0423  0.1038  0.4328  0.0283 -0.0055]\n",
      " [-0.0336 -0.0038  0.1292  0.0031 -0.0007]]\n",
      "mean_state_value 0.015244359977765553\n",
      "episode 334/600\n",
      "p1 0.8680000000000002 p0 0.03299999999999994\n",
      "trajectorySteps 24\n",
      "[[1 2 1 1 0]\n",
      " [3 0 0 5 4]\n",
      " [3 0 0 1 0]\n",
      " [0 0 2 1 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0326 -0.0211 -0.0071 -0.0106]\n",
      " [-0.0438 -0.0164 -0.009  -0.0097 -0.0039]\n",
      " [-0.0309 -0.0547  0.0485 -0.0107 -0.0032]\n",
      " [-0.0421  0.104   0.4349  0.0295 -0.0055]\n",
      " [-0.0335 -0.0037  0.1293  0.0031 -0.0007]]\n",
      "mean_state_value 0.015442925434183885\n",
      "episode 335/600\n",
      "p1 0.8688000000000002 p0 0.03279999999999994\n",
      "trajectorySteps 24\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 3]\n",
      " [5 0 0 0 2]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬅️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0325 -0.021  -0.0071 -0.0106]\n",
      " [-0.0436 -0.0163 -0.0089 -0.0097 -0.0039]\n",
      " [-0.0308 -0.0544  0.0486 -0.0106 -0.0031]\n",
      " [-0.0419  0.1041  0.4371  0.0296 -0.0054]\n",
      " [-0.0334 -0.0037  0.1306  0.0032 -0.0007]]\n",
      "mean_state_value 0.015656454203461557\n",
      "episode 336/600\n",
      "p1 0.8696000000000003 p0 0.03259999999999994\n",
      "trajectorySteps 67\n",
      "[[ 1  2  1  1  0]\n",
      " [ 1  1  0  1  0]\n",
      " [45  2  0  2  2]\n",
      " [ 2  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬅️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.024  -0.0326 -0.0209 -0.0071 -0.0105]\n",
      " [-0.0434 -0.0162 -0.0089 -0.0096 -0.0039]\n",
      " [-0.0309 -0.0541  0.0487 -0.0105 -0.0031]\n",
      " [-0.0418  0.1043  0.4392  0.0296 -0.0054]\n",
      " [-0.0332 -0.0037  0.1318  0.0033 -0.0007]]\n",
      "mean_state_value 0.015852966326547344\n",
      "episode 337/600\n",
      "p1 0.8704000000000003 p0 0.03239999999999994\n",
      "trajectorySteps 63\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [31  0  2  0  0]\n",
      " [24  2  3  0  0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬆️⏪⏩️➡️⬅️\n",
      "⬆️⬅️⏬➡️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.024  -0.0324 -0.0208 -0.007  -0.0104]\n",
      " [-0.0432 -0.0161 -0.0088 -0.0096 -0.0039]\n",
      " [-0.0309 -0.0539  0.0488 -0.0105 -0.0031]\n",
      " [-0.0417  0.1044  0.4413  0.0296 -0.0054]\n",
      " [-0.0339 -0.0034  0.1328  0.0034 -0.0007]]\n",
      "mean_state_value 0.01602925949738801\n",
      "episode 338/600\n",
      "p1 0.8712000000000002 p0 0.03219999999999994\n",
      "trajectorySteps 92\n",
      "[[ 1  4  1  0  1]\n",
      " [ 2  0  1 14 13]\n",
      " [ 4  0  0  1 43]\n",
      " [ 0  0  2  1  1]\n",
      " [ 0  0  2  1  0]]\n",
      "⬇️⬅️➡️⬇️⬇️\n",
      "⬆️⏪⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0239 -0.0323 -0.021  -0.007  -0.0104]\n",
      " [-0.043  -0.016  -0.0088 -0.0095 -0.0039]\n",
      " [-0.0309 -0.0536  0.0488 -0.0104 -0.0031]\n",
      " [-0.0415  0.1046  0.4435  0.0297 -0.0057]\n",
      " [-0.0337 -0.0034  0.134   0.0035 -0.0007]]\n",
      "mean_state_value 0.016216449134511784\n",
      "episode 339/600\n",
      "p1 0.8720000000000002 p0 0.03199999999999994\n",
      "trajectorySteps 846\n",
      "[[395  23   0   0   0]\n",
      " [384  15   1   1   0]\n",
      " [ 14   2   0   1   1]\n",
      " [  0   0   2   0   1]\n",
      " [  0   0   1   3   2]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0251 -0.0321 -0.0209 -0.007  -0.0103]\n",
      " [-0.0478 -0.0166 -0.0087 -0.0095 -0.0039]\n",
      " [-0.0309 -0.0533  0.0489 -0.0104 -0.0031]\n",
      " [-0.0413  0.1047  0.4456  0.0297 -0.0056]\n",
      " [-0.0336 -0.0033  0.1353  0.0036 -0.0007]]\n",
      "mean_state_value 0.016152136337451325\n",
      "episode 340/600\n",
      "p1 0.8728000000000002 p0 0.03179999999999994\n",
      "trajectorySteps 22\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [10  3  0  0  0]\n",
      " [ 6  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.025  -0.032  -0.0208 -0.0069 -0.0102]\n",
      " [-0.0475 -0.0165 -0.0087 -0.0094 -0.0039]\n",
      " [-0.0309 -0.0531  0.049  -0.0103 -0.0031]\n",
      " [-0.0414  0.106   0.4478  0.0297 -0.0056]\n",
      " [-0.0335 -0.0033  0.1354  0.0036 -0.0007]]\n",
      "mean_state_value 0.016348396095662018\n",
      "episode 341/600\n",
      "p1 0.8736000000000003 p0 0.03159999999999994\n",
      "trajectorySteps 100\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [49  5  0  0  0]\n",
      " [41  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0249 -0.0318 -0.0206 -0.0069 -0.0102]\n",
      " [-0.0473 -0.0165 -0.0086 -0.0094 -0.0038]\n",
      " [-0.031  -0.0529  0.049  -0.0102 -0.0031]\n",
      " [-0.0418  0.1073  0.45    0.0297 -0.0056]\n",
      " [-0.0333 -0.0033  0.1356  0.0036 -0.0006]]\n",
      "mean_state_value 0.01653143334813588\n",
      "episode 342/600\n",
      "p1 0.8744000000000003 p0 0.03139999999999994\n",
      "trajectorySteps 71\n",
      "[[ 0  2  1  4  0]\n",
      " [ 3  2  0  2  0]\n",
      " [46  3  0  1  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0249 -0.0319 -0.0205 -0.0069 -0.0101]\n",
      " [-0.0474 -0.0164 -0.0086 -0.0093 -0.0038]\n",
      " [-0.0312 -0.0527  0.0491 -0.0102 -0.0031]\n",
      " [-0.0416  0.1075  0.4521  0.0298 -0.0055]\n",
      " [-0.0332 -0.0032  0.1368  0.0037 -0.0006]]\n",
      "mean_state_value 0.016713455832862326\n",
      "episode 343/600\n",
      "p1 0.8752000000000002 p0 0.03119999999999994\n",
      "trajectorySteps 19\n",
      "[[0 0 0 0 0]\n",
      " [7 0 0 0 0]\n",
      " [8 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️⬇️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0248 -0.0318 -0.0204 -0.0069 -0.0101]\n",
      " [-0.0473 -0.0163 -0.0085 -0.0093 -0.0038]\n",
      " [-0.0312 -0.0527  0.0492 -0.0101 -0.003 ]\n",
      " [-0.0414  0.1088  0.4543  0.0298 -0.0055]\n",
      " [-0.0331 -0.0032  0.137   0.0038 -0.0006]]\n",
      "mean_state_value 0.016909909201667718\n",
      "episode 344/600\n",
      "p1 0.8760000000000002 p0 0.030999999999999937\n",
      "trajectorySteps 17\n",
      "[[0 1 1 1 0]\n",
      " [1 1 0 2 0]\n",
      " [1 0 0 2 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0247 -0.0316 -0.0203 -0.0068 -0.01  ]\n",
      " [-0.0473 -0.0163 -0.0085 -0.0092 -0.0038]\n",
      " [-0.0312 -0.0524  0.0492 -0.0101 -0.003 ]\n",
      " [-0.0412  0.1089  0.4565  0.0298 -0.0055]\n",
      " [-0.0329 -0.0032  0.1383  0.0039 -0.0006]]\n",
      "mean_state_value 0.01711947410754911\n",
      "episode 345/600\n",
      "p1 0.8768000000000002 p0 0.03079999999999994\n",
      "trajectorySteps 13\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [6 4 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0247 -0.0314 -0.0202 -0.0068 -0.0099]\n",
      " [-0.0471 -0.0162 -0.0084 -0.0091 -0.0038]\n",
      " [-0.0312 -0.0525  0.0493 -0.01   -0.003 ]\n",
      " [-0.041   0.1102  0.4586  0.0299 -0.0054]\n",
      " [-0.0328 -0.0031  0.1384  0.0039 -0.0006]]\n",
      "mean_state_value 0.01732120337165959\n",
      "episode 346/600\n",
      "p1 0.8776000000000003 p0 0.03059999999999994\n",
      "trajectorySteps 31\n",
      "[[1 2 1 1 0]\n",
      " [7 0 0 1 1]\n",
      " [9 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0246 -0.0313 -0.0201 -0.0068 -0.0099]\n",
      " [-0.047  -0.0161 -0.0084 -0.0091 -0.0038]\n",
      " [-0.0312 -0.0522  0.0494 -0.0099 -0.003 ]\n",
      " [-0.0408  0.1104  0.4608  0.0299 -0.0054]\n",
      " [-0.0326 -0.0031  0.1397  0.004  -0.0006]]\n",
      "mean_state_value 0.017535429906147534\n",
      "episode 347/600\n",
      "p1 0.8784000000000003 p0 0.030399999999999937\n",
      "trajectorySteps 23\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [17  1  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0246 -0.0311 -0.0199 -0.0067 -0.0098]\n",
      " [-0.0467 -0.016  -0.0083 -0.009  -0.0037]\n",
      " [-0.0311 -0.0519  0.0495 -0.0099 -0.003 ]\n",
      " [-0.0407  0.1117  0.463   0.0299 -0.0054]\n",
      " [-0.0331 -0.0034  0.1399  0.0041 -0.0006]]\n",
      "mean_state_value 0.017719156243005352\n",
      "episode 348/600\n",
      "p1 0.8792000000000002 p0 0.03019999999999994\n",
      "trajectorySteps 34\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [ 5  0  2  0  0]\n",
      " [21  1  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0245 -0.0309 -0.0198 -0.0067 -0.0097]\n",
      " [-0.0465 -0.016  -0.0083 -0.009  -0.0037]\n",
      " [-0.0311 -0.0516  0.0495 -0.0098 -0.003 ]\n",
      " [-0.0406  0.1118  0.4652  0.0299 -0.0053]\n",
      " [-0.0336 -0.0032  0.1411  0.0041 -0.0006]]\n",
      "mean_state_value 0.017913023699530285\n",
      "episode 349/600\n",
      "p1 0.8800000000000002 p0 0.029999999999999936\n",
      "trajectorySteps 87\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [79  3  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0244 -0.0308 -0.0197 -0.0067 -0.0097]\n",
      " [-0.0462 -0.0159 -0.0082 -0.0089 -0.0037]\n",
      " [-0.0312 -0.0514  0.0496 -0.0097 -0.003 ]\n",
      " [-0.0406  0.1132  0.4665  0.03   -0.0053]\n",
      " [-0.0335 -0.0032  0.1413  0.0041 -0.0006]]\n",
      "mean_state_value 0.018081026932502894\n",
      "episode 350/600\n",
      "p1 0.8808000000000002 p0 0.029799999999999938\n",
      "trajectorySteps 24\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [10 11  0  0  0]\n",
      " [ 0  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0244 -0.0306 -0.0196 -0.0066 -0.0096]\n",
      " [-0.046  -0.0158 -0.0081 -0.0089 -0.0037]\n",
      " [-0.0312 -0.0516  0.0497 -0.0097 -0.003 ]\n",
      " [-0.0404  0.1145  0.4687  0.03   -0.0052]\n",
      " [-0.0333 -0.0031  0.1415  0.0041 -0.0006]]\n",
      "mean_state_value 0.018282167385103163\n",
      "episode 351/600\n",
      "p1 0.8816000000000003 p0 0.02959999999999994\n",
      "trajectorySteps 28\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 3  1  2  0  0]\n",
      " [21  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0243 -0.0304 -0.0195 -0.0066 -0.0096]\n",
      " [-0.0457 -0.0157 -0.0081 -0.0088 -0.0037]\n",
      " [-0.0312 -0.0513  0.0497 -0.0096 -0.0029]\n",
      " [-0.0406  0.1158  0.4709  0.03   -0.0052]\n",
      " [-0.0335 -0.0031  0.1416  0.0041 -0.0006]]\n",
      "mean_state_value 0.018477222805969275\n",
      "episode 352/600\n",
      "p1 0.8824000000000003 p0 0.029399999999999937\n",
      "trajectorySteps 63\n",
      "[[ 0  0  0  0  0]\n",
      " [19  0  0  0  0]\n",
      " [28  3  0  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [ 8  1  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.0302 -0.0194 -0.0066 -0.0095]\n",
      " [-0.0459 -0.0156 -0.008  -0.0088 -0.0037]\n",
      " [-0.0313 -0.051   0.0498 -0.0095 -0.0029]\n",
      " [-0.0404  0.116   0.4731  0.0301 -0.0052]\n",
      " [-0.0338 -0.0029  0.1429  0.0042 -0.0006]]\n",
      "mean_state_value 0.018661744546224813\n",
      "episode 353/600\n",
      "p1 0.8832000000000002 p0 0.029199999999999938\n",
      "trajectorySteps 40\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [18  1  2  0  0]\n",
      " [17  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0242 -0.0301 -0.0192 -0.0065 -0.0094]\n",
      " [-0.0456 -0.0155 -0.008  -0.0087 -0.0036]\n",
      " [-0.0313 -0.0507  0.0499 -0.0095 -0.0029]\n",
      " [-0.0407  0.1173  0.4742  0.0301 -0.0051]\n",
      " [-0.0337 -0.0029  0.1431  0.0042 -0.0006]]\n",
      "mean_state_value 0.018815651655726157\n",
      "episode 354/600\n",
      "p1 0.8840000000000002 p0 0.028999999999999936\n",
      "trajectorySteps 47\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [21  1  0  0  0]\n",
      " [18  0  2  0  0]\n",
      " [ 2  1  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0241 -0.0299 -0.0191 -0.0065 -0.0094]\n",
      " [-0.0454 -0.0154 -0.0079 -0.0086 -0.0036]\n",
      " [-0.0313 -0.0505  0.0499 -0.0094 -0.0029]\n",
      " [-0.0406  0.1174  0.4764  0.0301 -0.0051]\n",
      " [-0.0339 -0.0027  0.1444  0.0042 -0.0006]]\n",
      "mean_state_value 0.019021644720317338\n",
      "episode 355/600\n",
      "p1 0.8848000000000003 p0 0.028799999999999937\n",
      "trajectorySteps 14\n",
      "[[0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [6 3 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.024  -0.0297 -0.019  -0.0064 -0.0093]\n",
      " [-0.0452 -0.0154 -0.0079 -0.0086 -0.0036]\n",
      " [-0.0314 -0.0505  0.0513 -0.0094 -0.0029]\n",
      " [-0.0404  0.1176  0.4786  0.0302 -0.0051]\n",
      " [-0.0337 -0.0027  0.1445  0.0042 -0.0006]]\n",
      "mean_state_value 0.019229795152397474\n",
      "episode 356/600\n",
      "p1 0.8856000000000003 p0 0.028599999999999938\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 1 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.024  -0.0295 -0.0189 -0.0064 -0.0092]\n",
      " [-0.0449 -0.0153 -0.0078 -0.0085 -0.0036]\n",
      " [-0.0313 -0.0505  0.0526 -0.0093 -0.0029]\n",
      " [-0.0401  0.1178  0.4809  0.0302 -0.005 ]\n",
      " [-0.0336 -0.0027  0.1447  0.0042 -0.0006]]\n",
      "mean_state_value 0.01944349733557786\n",
      "episode 357/600\n",
      "p1 0.8864000000000003 p0 0.028399999999999936\n",
      "trajectorySteps 33\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [16 12  0  0  0]\n",
      " [ 0  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "🔄🔄⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0239 -0.0294 -0.0188 -0.0064 -0.0092]\n",
      " [-0.0447 -0.0152 -0.0078 -0.0085 -0.0036]\n",
      " [-0.0313 -0.0507  0.0527 -0.0092 -0.0028]\n",
      " [-0.0399  0.1191  0.4831  0.0302 -0.005 ]\n",
      " [-0.0334 -0.0026  0.1448  0.0043 -0.0006]]\n",
      "mean_state_value 0.01964573690101422\n",
      "episode 358/600\n",
      "p1 0.8872000000000002 p0 0.028199999999999937\n",
      "trajectorySteps 19\n",
      "[[0 1 2 1 1]\n",
      " [0 1 0 0 2]\n",
      " [1 2 0 0 2]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0238 -0.0292 -0.0187 -0.0063 -0.0091]\n",
      " [-0.0445 -0.0151 -0.0077 -0.0084 -0.0036]\n",
      " [-0.0313 -0.0506  0.0527 -0.0092 -0.0028]\n",
      " [-0.0397  0.1193  0.4853  0.0302 -0.005 ]\n",
      " [-0.0333 -0.0026  0.1462  0.0044 -0.0006]]\n",
      "mean_state_value 0.019862039412056127\n",
      "episode 359/600\n",
      "p1 0.8880000000000002 p0 0.027999999999999935\n",
      "trajectorySteps 78\n",
      "[[ 0  0  0  0  0]\n",
      " [ 6  1  0  0  0]\n",
      " [42  2  0  0  0]\n",
      " [24  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0238 -0.029  -0.0186 -0.0063 -0.0091]\n",
      " [-0.0445 -0.0151 -0.0077 -0.0084 -0.0035]\n",
      " [-0.0313 -0.0506  0.0528 -0.0091 -0.0028]\n",
      " [-0.0396  0.1206  0.4875  0.0303 -0.0049]\n",
      " [-0.0331 -0.0025  0.1463  0.0044 -0.0006]]\n",
      "mean_state_value 0.020056254719349263\n",
      "episode 360/600\n",
      "p1 0.8888000000000003 p0 0.027799999999999936\n",
      "trajectorySteps 21\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [14  1  1  0  0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0237 -0.0289 -0.0184 -0.0063 -0.009 ]\n",
      " [-0.0443 -0.015  -0.0076 -0.0083 -0.0035]\n",
      " [-0.0313 -0.0503  0.0529 -0.009  -0.0028]\n",
      " [-0.0394  0.1208  0.4897  0.0303 -0.0049]\n",
      " [-0.0335 -0.0024  0.1476  0.0044 -0.0006]]\n",
      "mean_state_value 0.020263389877243423\n",
      "episode 361/600\n",
      "p1 0.8896000000000003 p0 0.027599999999999937\n",
      "trajectorySteps 10\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [2 0 2 0 0]\n",
      " [2 1 1 0 0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0236 -0.0287 -0.0183 -0.0062 -0.0089]\n",
      " [-0.044  -0.0149 -0.0076 -0.0083 -0.0035]\n",
      " [-0.0313 -0.05    0.053  -0.009  -0.0028]\n",
      " [-0.0392  0.1209  0.492   0.0303 -0.0049]\n",
      " [-0.0336 -0.0022  0.149   0.0045 -0.0005]]\n",
      "mean_state_value 0.020480463856792603\n",
      "episode 362/600\n",
      "p1 0.8904000000000003 p0 0.027399999999999935\n",
      "trajectorySteps 21\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 1  0  2  0  0]\n",
      " [13  1  1  0  0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0236 -0.0285 -0.0182 -0.0062 -0.0089]\n",
      " [-0.0438 -0.0148 -0.0075 -0.0082 -0.0035]\n",
      " [-0.0312 -0.0497  0.053  -0.0089 -0.0028]\n",
      " [-0.0391  0.1211  0.4942  0.0304 -0.0048]\n",
      " [-0.0339 -0.0021  0.1503  0.0045 -0.0005]]\n",
      "mean_state_value 0.02069298866710761\n",
      "episode 363/600\n",
      "p1 0.8912000000000002 p0 0.027199999999999936\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0235 -0.0283 -0.0181 -0.0062 -0.0088]\n",
      " [-0.0435 -0.0147 -0.0075 -0.0082 -0.0035]\n",
      " [-0.0312 -0.0495  0.0531 -0.0088 -0.0028]\n",
      " [-0.0391  0.1224  0.4964  0.0304 -0.0048]\n",
      " [-0.0337 -0.002   0.1505  0.0045 -0.0005]]\n",
      "mean_state_value 0.020908315280802982\n",
      "episode 364/600\n",
      "p1 0.8920000000000002 p0 0.026999999999999934\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0234 -0.0282 -0.018  -0.0061 -0.0087]\n",
      " [-0.0433 -0.0146 -0.0074 -0.0081 -0.0035]\n",
      " [-0.0311 -0.0492  0.0532 -0.0088 -0.0028]\n",
      " [-0.0391  0.1238  0.4987  0.0304 -0.0048]\n",
      " [-0.0336 -0.002   0.1506  0.0045 -0.0005]]\n",
      "mean_state_value 0.02112518332560411\n",
      "episode 365/600\n",
      "p1 0.8928000000000003 p0 0.026799999999999935\n",
      "trajectorySteps 48\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [22  0  2  0  0]\n",
      " [21  1  1  0  0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0234 -0.028  -0.0179 -0.0061 -0.0087]\n",
      " [-0.043  -0.0145 -0.0074 -0.008  -0.0034]\n",
      " [-0.031  -0.0489  0.0532 -0.0087 -0.0027]\n",
      " [-0.0391  0.124   0.4998  0.0304 -0.0047]\n",
      " [-0.0338 -0.0018  0.152   0.0045 -0.0005]]\n",
      "mean_state_value 0.02129177503860225\n",
      "episode 366/600\n",
      "p1 0.8936000000000003 p0 0.026599999999999936\n",
      "trajectorySteps 34\n",
      "[[ 0 10  1  1  0]\n",
      " [ 6  1  0  1  1]\n",
      " [ 6  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬇️🔄⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.330e-02 -2.790e-02 -1.770e-02 -6.000e-03 -8.600e-03]\n",
      " [-4.310e-02 -1.450e-02 -7.300e-03 -8.000e-03 -3.400e-03]\n",
      " [-3.100e-02 -4.860e-02  5.330e-02 -8.700e-03 -2.700e-03]\n",
      " [-3.890e-02  1.241e-01  5.021e-01  3.050e-02 -4.700e-03]\n",
      " [-3.360e-02 -1.800e-03  1.533e-01  4.700e-03 -5.000e-04]]\n",
      "mean_state_value 0.021505521263562137\n",
      "episode 367/600\n",
      "p1 0.8944000000000003 p0 0.026399999999999934\n",
      "trajectorySteps 63\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [23  6  0  0  0]\n",
      " [22  1  2  0  0]\n",
      " [ 9  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.320e-02 -2.770e-02 -1.760e-02 -6.000e-03 -8.500e-03]\n",
      " [-4.280e-02 -1.440e-02 -7.300e-03 -7.900e-03 -3.400e-03]\n",
      " [-3.100e-02 -4.830e-02  5.340e-02 -8.600e-03 -2.700e-03]\n",
      " [-3.920e-02  1.255e-01  5.043e-01  3.050e-02 -4.700e-03]\n",
      " [-3.360e-02 -1.700e-03  1.535e-01  4.700e-03 -5.000e-04]]\n",
      "mean_state_value 0.02170270892276611\n",
      "episode 368/600\n",
      "p1 0.8952000000000002 p0 0.026199999999999935\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 0]\n",
      " [2 0 0 1 1]\n",
      " [3 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.320e-02 -2.750e-02 -1.750e-02 -6.000e-03 -8.500e-03]\n",
      " [-4.260e-02 -1.430e-02 -7.200e-03 -7.900e-03 -3.400e-03]\n",
      " [-3.100e-02 -4.800e-02  5.350e-02 -8.500e-03 -2.700e-03]\n",
      " [-3.900e-02  1.257e-01  5.055e-01  3.050e-02 -4.600e-03]\n",
      " [-3.350e-02 -1.700e-03  1.548e-01  4.800e-03 -5.000e-04]]\n",
      "mean_state_value 0.021888412352451866\n",
      "episode 369/600\n",
      "p1 0.8960000000000002 p0 0.025999999999999933\n",
      "trajectorySteps 61\n",
      "[[ 1  3  4  1  1]\n",
      " [38  0  0  1  2]\n",
      " [ 3  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.310e-02 -2.740e-02 -1.740e-02 -5.900e-03 -8.400e-03]\n",
      " [-4.250e-02 -1.420e-02 -7.200e-03 -7.800e-03 -3.400e-03]\n",
      " [-3.100e-02 -4.770e-02  5.350e-02 -8.500e-03 -2.700e-03]\n",
      " [-3.880e-02  1.258e-01  5.078e-01  3.060e-02 -4.600e-03]\n",
      " [-3.330e-02 -1.700e-03  1.562e-01  5.000e-03 -5.000e-04]]\n",
      "mean_state_value 0.02211110372520049\n",
      "episode 370/600\n",
      "p1 0.8968000000000003 p0 0.025799999999999934\n",
      "trajectorySteps 52\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 4  1  0  0  0]\n",
      " [ 4  1  2  0  0]\n",
      " [40  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.310e-02 -2.720e-02 -1.730e-02 -5.900e-03 -8.400e-03]\n",
      " [-4.220e-02 -1.410e-02 -7.100e-03 -7.800e-03 -3.400e-03]\n",
      " [-3.100e-02 -4.740e-02  5.360e-02 -8.400e-03 -2.700e-03]\n",
      " [-3.880e-02  1.272e-01  5.100e-01  3.060e-02 -4.600e-03]\n",
      " [-3.350e-02 -1.600e-03  1.563e-01  5.000e-03 -5.000e-04]]\n",
      "mean_state_value 0.022314280519717625\n",
      "episode 371/600\n",
      "p1 0.8976000000000003 p0 0.025599999999999935\n",
      "trajectorySteps 29\n",
      "[[0 1 1 1 0]\n",
      " [8 1 0 1 1]\n",
      " [6 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.300e-02 -2.700e-02 -1.720e-02 -5.900e-03 -8.300e-03]\n",
      " [-4.230e-02 -1.410e-02 -7.100e-03 -7.700e-03 -3.400e-03]\n",
      " [-3.090e-02 -4.710e-02  5.370e-02 -8.300e-03 -2.700e-03]\n",
      " [-3.860e-02  1.274e-01  5.123e-01  3.060e-02 -4.500e-03]\n",
      " [-3.330e-02 -1.600e-03  1.577e-01  5.200e-03 -5.000e-04]]\n",
      "mean_state_value 0.02253246472338485\n",
      "episode 372/600\n",
      "p1 0.8984000000000003 p0 0.025399999999999933\n",
      "trajectorySteps 145\n",
      "[[  1   2   1   1   0]\n",
      " [  5   0   0   1   0]\n",
      " [118   4   0   1   1]\n",
      " [  4   0   2   0   1]\n",
      " [  0   0   1   1   1]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.290e-02 -2.690e-02 -1.710e-02 -5.800e-03 -8.200e-03]\n",
      " [-4.210e-02 -1.400e-02 -7.000e-03 -7.700e-03 -3.300e-03]\n",
      " [-3.100e-02 -4.690e-02  5.370e-02 -8.300e-03 -2.600e-03]\n",
      " [-3.840e-02  1.275e-01  5.138e-01  3.070e-02 -4.500e-03]\n",
      " [-3.320e-02 -1.500e-03  1.590e-01  5.300e-03 -5.000e-04]]\n",
      "mean_state_value 0.02271907304059377\n",
      "episode 373/600\n",
      "p1 0.8992000000000002 p0 0.025199999999999934\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [1 0 0 0 0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬅️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.290e-02 -2.670e-02 -1.700e-02 -5.800e-03 -8.200e-03]\n",
      " [-4.190e-02 -1.390e-02 -7.000e-03 -7.600e-03 -3.300e-03]\n",
      " [-3.100e-02 -4.660e-02  5.380e-02 -8.200e-03 -2.600e-03]\n",
      " [-3.840e-02  1.289e-01  5.160e-01  3.070e-02 -4.500e-03]\n",
      " [-3.310e-02 -1.500e-03  1.592e-01  5.300e-03 -5.000e-04]]\n",
      "mean_state_value 0.02293870211743361\n",
      "episode 374/600\n",
      "p1 0.9000000000000002 p0 0.024999999999999932\n",
      "trajectorySteps 16\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [11  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬅️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.280e-02 -2.650e-02 -1.680e-02 -5.800e-03 -8.100e-03]\n",
      " [-4.160e-02 -1.380e-02 -6.900e-03 -7.600e-03 -3.300e-03]\n",
      " [-3.090e-02 -4.630e-02  5.390e-02 -8.200e-03 -2.600e-03]\n",
      " [-3.850e-02  1.303e-01  5.183e-01  3.070e-02 -4.400e-03]\n",
      " [-3.290e-02 -1.500e-03  1.594e-01  5.400e-03 -5.000e-04]]\n",
      "mean_state_value 0.023157322023964734\n",
      "episode 375/600\n",
      "p1 0.9008000000000003 p0 0.024799999999999933\n",
      "trajectorySteps 44\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [21  1  2  0  0]\n",
      " [19  0  0  0  0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "➡️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.270e-02 -2.640e-02 -1.670e-02 -5.700e-03 -8.000e-03]\n",
      " [-4.140e-02 -1.370e-02 -6.900e-03 -7.500e-03 -3.300e-03]\n",
      " [-3.090e-02 -4.600e-02  5.400e-02 -8.100e-03 -2.600e-03]\n",
      " [-3.860e-02  1.317e-01  5.206e-01  3.070e-02 -4.400e-03]\n",
      " [-3.310e-02 -1.400e-03  1.595e-01  5.400e-03 -5.000e-04]]\n",
      "mean_state_value 0.02336151522790235\n",
      "episode 376/600\n",
      "p1 0.9016000000000003 p0 0.024599999999999934\n",
      "trajectorySteps 7\n",
      "[[0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [2 1 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "🔄➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬅️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.270e-02 -2.620e-02 -1.660e-02 -5.700e-03 -8.000e-03]\n",
      " [-4.110e-02 -1.370e-02 -6.800e-03 -7.400e-03 -3.300e-03]\n",
      " [-3.080e-02 -4.590e-02  5.400e-02 -8.000e-03 -2.600e-03]\n",
      " [-3.840e-02  1.331e-01  5.229e-01  3.080e-02 -4.300e-03]\n",
      " [-3.290e-02 -1.400e-03  1.597e-01  5.400e-03 -5.000e-04]]\n",
      "mean_state_value 0.023581540196471507\n",
      "episode 377/600\n",
      "p1 0.9024000000000003 p0 0.024399999999999932\n",
      "trajectorySteps 347\n",
      "[[ 12   1   1   1   0]\n",
      " [111   3   0   3   2]\n",
      " [116   6   0  41  44]\n",
      " [  3   0   2   1   0]\n",
      " [  0   0   0   0   0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.260e-02 -2.600e-02 -1.650e-02 -5.700e-03 -7.900e-03]\n",
      " [-4.250e-02 -1.360e-02 -6.800e-03 -7.400e-03 -3.300e-03]\n",
      " [-3.090e-02 -4.570e-02  5.410e-02 -8.200e-03 -2.600e-03]\n",
      " [-3.820e-02  1.333e-01  5.252e-01  3.210e-02 -4.300e-03]\n",
      " [-3.280e-02 -1.400e-03  1.599e-01  5.400e-03 -5.000e-04]]\n",
      "mean_state_value 0.023727348141305723\n",
      "episode 378/600\n",
      "p1 0.9032000000000002 p0 0.024199999999999933\n",
      "trajectorySteps 14\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 8 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.250e-02 -2.580e-02 -1.640e-02 -5.600e-03 -7.900e-03]\n",
      " [-4.220e-02 -1.350e-02 -6.700e-03 -7.300e-03 -3.200e-03]\n",
      " [-3.090e-02 -4.570e-02  5.420e-02 -8.200e-03 -2.600e-03]\n",
      " [-3.800e-02  1.347e-01  5.275e-01  3.220e-02 -4.300e-03]\n",
      " [-3.260e-02 -1.300e-03  1.600e-01  5.400e-03 -4.000e-04]]\n",
      "mean_state_value 0.023944098170279027\n",
      "episode 379/600\n",
      "p1 0.9040000000000002 p0 0.02399999999999993\n",
      "trajectorySteps 11\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [5 3 0 0 0]\n",
      " [0 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.250e-02 -2.570e-02 -1.630e-02 -5.600e-03 -7.800e-03]\n",
      " [-4.200e-02 -1.340e-02 -6.600e-03 -7.300e-03 -3.200e-03]\n",
      " [-3.090e-02 -4.570e-02  5.430e-02 -8.100e-03 -2.600e-03]\n",
      " [-3.780e-02  1.361e-01  5.287e-01  3.220e-02 -4.200e-03]\n",
      " [-3.250e-02 -1.300e-03  1.602e-01  5.500e-03 -4.000e-04]]\n",
      "mean_state_value 0.024121789702507047\n",
      "episode 380/600\n",
      "p1 0.9048000000000003 p0 0.023799999999999932\n",
      "trajectorySteps 48\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [16  1  0  0  0]\n",
      " [17  1  2  0  0]\n",
      " [11  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.240e-02 -2.550e-02 -1.610e-02 -5.600e-03 -7.700e-03]\n",
      " [-4.170e-02 -1.330e-02 -6.600e-03 -7.200e-03 -3.200e-03]\n",
      " [-3.090e-02 -4.540e-02  5.430e-02 -8.000e-03 -2.600e-03]\n",
      " [-3.790e-02  1.375e-01  5.310e-01  3.220e-02 -4.200e-03]\n",
      " [-3.250e-02 -1.200e-03  1.604e-01  5.500e-03 -4.000e-04]]\n",
      "mean_state_value 0.024328986432979913\n",
      "episode 381/600\n",
      "p1 0.9056000000000003 p0 0.023599999999999934\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 1 1 0 0]\n",
      " [0 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.230e-02 -2.530e-02 -1.600e-02 -5.500e-03 -7.700e-03]\n",
      " [-4.140e-02 -1.320e-02 -6.500e-03 -7.200e-03 -3.200e-03]\n",
      " [-3.090e-02 -4.530e-02  5.570e-02 -8.000e-03 -2.600e-03]\n",
      " [-3.770e-02  1.376e-01  5.333e-01  3.230e-02 -4.200e-03]\n",
      " [-3.230e-02 -1.200e-03  1.605e-01  5.500e-03 -4.000e-04]]\n",
      "mean_state_value 0.024555432983691238\n",
      "episode 382/600\n",
      "p1 0.9064000000000003 p0 0.02339999999999993\n",
      "trajectorySteps 16\n",
      "[[0 1 1 1 0]\n",
      " [0 1 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.230e-02 -2.520e-02 -1.590e-02 -5.500e-03 -7.600e-03]\n",
      " [-4.120e-02 -1.320e-02 -6.500e-03 -7.100e-03 -3.200e-03]\n",
      " [-3.080e-02 -4.520e-02  5.580e-02 -7.900e-03 -2.600e-03]\n",
      " [-3.750e-02  1.378e-01  5.356e-01  3.230e-02 -4.100e-03]\n",
      " [-3.220e-02 -1.200e-03  1.619e-01  5.700e-03 -4.000e-04]]\n",
      "mean_state_value 0.024782050068393954\n",
      "episode 383/600\n",
      "p1 0.9072000000000002 p0 0.023199999999999932\n",
      "trajectorySteps 66\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [14 13  1  0  0]\n",
      " [36  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "⬇️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.220e-02 -2.500e-02 -1.580e-02 -5.500e-03 -7.500e-03]\n",
      " [-4.090e-02 -1.310e-02 -6.400e-03 -7.100e-03 -3.200e-03]\n",
      " [-3.090e-02 -4.520e-02  5.720e-02 -7.800e-03 -2.500e-03]\n",
      " [-3.750e-02  1.380e-01  5.379e-01  3.230e-02 -4.100e-03]\n",
      " [-3.200e-02 -1.100e-03  1.621e-01  5.700e-03 -4.000e-04]]\n",
      "mean_state_value 0.02499704518538236\n",
      "episode 384/600\n",
      "p1 0.9080000000000003 p0 0.02299999999999993\n",
      "trajectorySteps 8\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [3 0 0 0 0]\n",
      " [2 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "⬇️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.210e-02 -2.480e-02 -1.570e-02 -5.400e-03 -7.500e-03]\n",
      " [-4.070e-02 -1.300e-02 -6.400e-03 -7.000e-03 -3.100e-03]\n",
      " [-3.080e-02 -4.490e-02  5.730e-02 -7.800e-03 -2.500e-03]\n",
      " [-3.750e-02  1.394e-01  5.402e-01  3.240e-02 -4.100e-03]\n",
      " [-3.190e-02 -1.100e-03  1.623e-01  5.700e-03 -4.000e-04]]\n",
      "mean_state_value 0.025221203256082153\n",
      "episode 385/600\n",
      "p1 0.9088000000000003 p0 0.02279999999999993\n",
      "trajectorySteps 169\n",
      "[[ 1  1  1  1  0]\n",
      " [34  0  0  1  1]\n",
      " [62  0  0  0  1]\n",
      " [59  0  2  0  1]\n",
      " [ 1  0  1  1  1]]\n",
      "➡️➡️🔄⬇️⬇️\n",
      "🔄⏫️⏩️⬇️⬇️\n",
      "➡️🔄⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.210e-02 -2.460e-02 -1.560e-02 -5.400e-03 -7.400e-03]\n",
      " [-4.130e-02 -1.290e-02 -6.300e-03 -7.000e-03 -3.100e-03]\n",
      " [-3.100e-02 -4.460e-02  5.740e-02 -7.700e-03 -2.500e-03]\n",
      " [-3.730e-02  1.396e-01  5.426e-01  3.240e-02 -4.000e-03]\n",
      " [-3.170e-02 -1.100e-03  1.637e-01  5.900e-03 -4.000e-04]]\n",
      "mean_state_value 0.025411538450454567\n",
      "episode 386/600\n",
      "p1 0.9096000000000003 p0 0.022599999999999933\n",
      "trajectorySteps 49\n",
      "[[ 1  1  9  0  0]\n",
      " [ 8  0  1 11  1]\n",
      " [ 1  0  0  9  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️🔄⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.200e-02 -2.450e-02 -1.570e-02 -5.300e-03 -7.400e-03]\n",
      " [-4.120e-02 -1.280e-02 -6.300e-03 -6.900e-03 -3.100e-03]\n",
      " [-3.090e-02 -4.430e-02  5.740e-02 -7.700e-03 -2.500e-03]\n",
      " [-3.710e-02  1.397e-01  5.449e-01  3.240e-02 -4.000e-03]\n",
      " [-3.160e-02 -1.000e-03  1.651e-01  6.000e-03 -4.000e-04]]\n",
      "mean_state_value 0.025631720945610836\n",
      "episode 387/600\n",
      "p1 0.9104000000000003 p0 0.02239999999999993\n",
      "trajectorySteps 22\n",
      "[[0 1 2 1 0]\n",
      " [0 1 0 1 1]\n",
      " [2 6 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️🔄⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.200e-02 -2.430e-02 -1.560e-02 -5.300e-03 -7.300e-03]\n",
      " [-4.090e-02 -1.280e-02 -6.200e-03 -6.800e-03 -3.100e-03]\n",
      " [-3.090e-02 -4.430e-02  5.750e-02 -7.600e-03 -2.500e-03]\n",
      " [-3.690e-02  1.399e-01  5.472e-01  3.250e-02 -4.000e-03]\n",
      " [-3.140e-02 -1.000e-03  1.665e-01  6.200e-03 -4.000e-04]]\n",
      "mean_state_value 0.0258538013185969\n",
      "episode 388/600\n",
      "p1 0.9112000000000002 p0 0.02219999999999993\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️🔄⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.190e-02 -2.410e-02 -1.550e-02 -5.300e-03 -7.200e-03]\n",
      " [-4.070e-02 -1.270e-02 -6.200e-03 -6.800e-03 -3.100e-03]\n",
      " [-3.080e-02 -4.400e-02  5.760e-02 -7.600e-03 -2.500e-03]\n",
      " [-3.690e-02  1.413e-01  5.495e-01  3.250e-02 -3.900e-03]\n",
      " [-3.130e-02 -9.000e-04  1.666e-01  6.200e-03 -4.000e-04]]\n",
      "mean_state_value 0.026081390811004773\n",
      "episode 389/600\n",
      "p1 0.9120000000000003 p0 0.02199999999999993\n",
      "trajectorySteps 37\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 3 31  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.180e-02 -2.400e-02 -1.530e-02 -5.200e-03 -7.200e-03]\n",
      " [-4.040e-02 -1.260e-02 -6.100e-03 -6.700e-03 -3.100e-03]\n",
      " [-3.080e-02 -4.400e-02  5.900e-02 -7.500e-03 -2.500e-03]\n",
      " [-3.660e-02  1.415e-01  5.510e-01  3.250e-02 -3.900e-03]\n",
      " [-3.110e-02 -9.000e-04  1.668e-01  6.200e-03 -4.000e-04]]\n",
      "mean_state_value 0.026274699190168335\n",
      "episode 390/600\n",
      "p1 0.9128000000000003 p0 0.02179999999999993\n",
      "trajectorySteps 18\n",
      "[[0 1 1 1 0]\n",
      " [1 1 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 2 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.180e-02 -2.380e-02 -1.520e-02 -5.200e-03 -7.100e-03]\n",
      " [-4.030e-02 -1.250e-02 -6.100e-03 -6.700e-03 -3.100e-03]\n",
      " [-3.070e-02 -4.370e-02  5.910e-02 -7.400e-03 -2.400e-03]\n",
      " [-3.640e-02  1.417e-01  5.534e-01  3.250e-02 -3.900e-03]\n",
      " [-3.100e-02 -9.000e-04  1.682e-01  6.400e-03 -4.000e-04]]\n",
      "mean_state_value 0.026505854761678948\n",
      "episode 391/600\n",
      "p1 0.9136000000000003 p0 0.021599999999999932\n",
      "trajectorySteps 31\n",
      "[[ 0  1  1  1  0]\n",
      " [ 0  1  0  1  1]\n",
      " [10  8  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.170e-02 -2.360e-02 -1.510e-02 -5.200e-03 -7.000e-03]\n",
      " [-4.010e-02 -1.250e-02 -6.000e-03 -6.600e-03 -3.000e-03]\n",
      " [-3.070e-02 -4.370e-02  5.920e-02 -7.400e-03 -2.400e-03]\n",
      " [-3.620e-02  1.419e-01  5.557e-01  3.260e-02 -3.900e-03]\n",
      " [-3.080e-02 -8.000e-04  1.696e-01  6.500e-03 -4.000e-04]]\n",
      "mean_state_value 0.026729761726670787\n",
      "episode 392/600\n",
      "p1 0.9144000000000003 p0 0.02139999999999993\n",
      "trajectorySteps 172\n",
      "[[  0   0   0   0   0]\n",
      " [ 20   0   0   0   0]\n",
      " [133   3   0   0   0]\n",
      " [  7   1   2   0   0]\n",
      " [  6   0   0   0   0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.160e-02 -2.350e-02 -1.500e-02 -5.100e-03 -7.000e-03]\n",
      " [-3.980e-02 -1.240e-02 -6.000e-03 -6.600e-03 -3.000e-03]\n",
      " [-3.090e-02 -4.340e-02  5.920e-02 -7.300e-03 -2.400e-03]\n",
      " [-3.620e-02  1.433e-01  5.581e-01  3.260e-02 -3.800e-03]\n",
      " [-3.070e-02 -8.000e-04  1.698e-01  6.600e-03 -4.000e-04]]\n",
      "mean_state_value 0.02694346729210928\n",
      "episode 393/600\n",
      "p1 0.9152000000000002 p0 0.02119999999999993\n",
      "trajectorySteps 41\n",
      "[[ 0  1  1  1  0]\n",
      " [ 1  1  0  1  1]\n",
      " [15 12  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.160e-02 -2.330e-02 -1.490e-02 -5.100e-03 -6.900e-03]\n",
      " [-3.960e-02 -1.230e-02 -5.900e-03 -6.500e-03 -3.000e-03]\n",
      " [-3.100e-02 -4.350e-02  5.930e-02 -7.200e-03 -2.400e-03]\n",
      " [-3.600e-02  1.435e-01  5.604e-01  3.260e-02 -3.800e-03]\n",
      " [-3.060e-02 -8.000e-04  1.712e-01  6.700e-03 -3.000e-04]]\n",
      "mean_state_value 0.027159887744091265\n",
      "episode 394/600\n",
      "p1 0.9160000000000003 p0 0.02099999999999993\n",
      "trajectorySteps 60\n",
      "[[ 0  1  1  1  0]\n",
      " [ 0  1  0  1  1]\n",
      " [23 24  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.150e-02 -2.310e-02 -1.470e-02 -5.100e-03 -6.900e-03]\n",
      " [-3.930e-02 -1.230e-02 -5.900e-03 -6.500e-03 -3.000e-03]\n",
      " [-3.110e-02 -4.380e-02  5.940e-02 -7.200e-03 -2.400e-03]\n",
      " [-3.580e-02  1.437e-01  5.628e-01  3.270e-02 -3.700e-03]\n",
      " [-3.040e-02 -7.000e-04  1.727e-01  6.900e-03 -3.000e-04]]\n",
      "mean_state_value 0.02737250898217595\n",
      "episode 395/600\n",
      "p1 0.9168000000000003 p0 0.02079999999999993\n",
      "trajectorySteps 30\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  1  0  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [21  1  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.140e-02 -2.300e-02 -1.460e-02 -5.000e-03 -6.800e-03]\n",
      " [-3.910e-02 -1.220e-02 -5.800e-03 -6.400e-03 -3.000e-03]\n",
      " [-3.100e-02 -4.350e-02  5.950e-02 -7.100e-03 -2.400e-03]\n",
      " [-3.560e-02  1.438e-01  5.651e-01  3.270e-02 -3.700e-03]\n",
      " [-3.060e-02 -5.000e-04  1.741e-01  6.900e-03 -3.000e-04]]\n",
      "mean_state_value 0.027600603330530046\n",
      "episode 396/600\n",
      "p1 0.9176000000000003 p0 0.02059999999999993\n",
      "trajectorySteps 48\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [22  1  2  0  0]\n",
      " [20  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.140e-02 -2.280e-02 -1.450e-02 -5.000e-03 -6.700e-03]\n",
      " [-3.880e-02 -1.210e-02 -5.800e-03 -6.400e-03 -3.000e-03]\n",
      " [-3.100e-02 -4.320e-02  5.950e-02 -7.000e-03 -2.400e-03]\n",
      " [-3.590e-02  1.453e-01  5.675e-01  3.270e-02 -3.700e-03]\n",
      " [-3.060e-02 -5.000e-04  1.743e-01  6.900e-03 -3.000e-04]]\n",
      "mean_state_value 0.027810983001520944\n",
      "episode 397/600\n",
      "p1 0.9184000000000003 p0 0.02039999999999993\n",
      "trajectorySteps 71\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [ 4  0  2  0  0]\n",
      " [61  1  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.130e-02 -2.260e-02 -1.440e-02 -5.000e-03 -6.700e-03]\n",
      " [-3.860e-02 -1.200e-02 -5.700e-03 -6.300e-03 -2.900e-03]\n",
      " [-3.090e-02 -4.280e-02  5.960e-02 -7.000e-03 -2.400e-03]\n",
      " [-3.570e-02  1.455e-01  5.698e-01  3.280e-02 -3.600e-03]\n",
      " [-3.100e-02 -3.000e-04  1.757e-01  7.000e-03 -3.000e-04]]\n",
      "mean_state_value 0.028031683801239084\n",
      "episode 398/600\n",
      "p1 0.9192000000000002 p0 0.02019999999999993\n",
      "trajectorySteps 154\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 4  0  0  0  0]\n",
      " [72  0  2  0  0]\n",
      " [74  1  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.120e-02 -2.240e-02 -1.430e-02 -4.900e-03 -6.600e-03]\n",
      " [-3.830e-02 -1.190e-02 -5.700e-03 -6.200e-03 -2.900e-03]\n",
      " [-3.090e-02 -4.250e-02  5.970e-02 -6.900e-03 -2.300e-03]\n",
      " [-3.570e-02  1.456e-01  5.722e-01  3.280e-02 -3.600e-03]\n",
      " [-3.170e-02 -1.000e-04  1.771e-01  7.000e-03 -3.000e-04]]\n",
      "mean_state_value 0.028231498053423666\n",
      "episode 399/600\n",
      "p1 0.9200000000000003 p0 0.019999999999999928\n",
      "trajectorySteps 38\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [14  0  2  0  0]\n",
      " [17  1  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.120e-02 -2.230e-02 -1.420e-02 -4.900e-03 -6.500e-03]\n",
      " [-3.810e-02 -1.180e-02 -5.600e-03 -6.200e-03 -2.900e-03]\n",
      " [-3.090e-02 -4.220e-02  5.980e-02 -6.800e-03 -2.300e-03]\n",
      " [-3.570e-02  1.458e-01  5.746e-01  3.280e-02 -3.600e-03]\n",
      " [-3.190e-02  1.000e-04  1.786e-01  7.000e-03 -3.000e-04]]\n",
      "mean_state_value 0.028447960670109525\n",
      "episode 400/600\n",
      "p1 0.9208000000000003 p0 0.01979999999999993\n",
      "trajectorySteps 84\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [50  1  2  0  0]\n",
      " [29  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.110e-02 -2.210e-02 -1.400e-02 -4.900e-03 -6.500e-03]\n",
      " [-3.780e-02 -1.180e-02 -5.600e-03 -6.100e-03 -2.900e-03]\n",
      " [-3.080e-02 -4.190e-02  5.980e-02 -6.800e-03 -2.300e-03]\n",
      " [-3.580e-02  1.473e-01  5.769e-01  3.290e-02 -3.500e-03]\n",
      " [-3.220e-02  1.000e-04  1.788e-01  7.000e-03 -3.000e-04]]\n",
      "mean_state_value 0.02866082397688582\n",
      "episode 401/600\n",
      "p1 0.9216000000000003 p0 0.01959999999999993\n",
      "trajectorySteps 73\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 7  4  0  0  0]\n",
      " [ 4  0  2  0  0]\n",
      " [53  2  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.100e-02 -2.190e-02 -1.390e-02 -4.800e-03 -6.400e-03]\n",
      " [-3.750e-02 -1.170e-02 -5.500e-03 -6.100e-03 -2.900e-03]\n",
      " [-3.080e-02 -4.160e-02  5.990e-02 -6.700e-03 -2.300e-03]\n",
      " [-3.560e-02  1.475e-01  5.793e-01  3.290e-02 -3.500e-03]\n",
      " [-3.230e-02  1.000e-04  1.802e-01  7.000e-03 -3.000e-04]]\n",
      "mean_state_value 0.028880032508014856\n",
      "episode 402/600\n",
      "p1 0.9224000000000003 p0 0.019399999999999928\n",
      "trajectorySteps 40\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [18  1  0  0  0]\n",
      " [17  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.100e-02 -2.170e-02 -1.380e-02 -4.800e-03 -6.400e-03]\n",
      " [-3.730e-02 -1.160e-02 -5.500e-03 -6.000e-03 -2.900e-03]\n",
      " [-3.090e-02 -4.130e-02  6.000e-02 -6.700e-03 -2.300e-03]\n",
      " [-3.580e-02  1.489e-01  5.817e-01  3.290e-02 -3.500e-03]\n",
      " [-3.220e-02  2.000e-04  1.804e-01  7.100e-03 -3.000e-04]]\n",
      "mean_state_value 0.029096970235243415\n",
      "episode 403/600\n",
      "p1 0.9232000000000002 p0 0.01919999999999993\n",
      "trajectorySteps 116\n",
      "[[ 0  0  0  0  0]\n",
      " [ 3  0  0  0  0]\n",
      " [55  2  0  0  0]\n",
      " [52  1  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.090e-02 -2.160e-02 -1.370e-02 -4.700e-03 -6.300e-03]\n",
      " [-3.710e-02 -1.150e-02 -5.400e-03 -6.000e-03 -2.800e-03]\n",
      " [-3.090e-02 -4.100e-02  6.010e-02 -6.600e-03 -2.300e-03]\n",
      " [-3.620e-02  1.504e-01  5.841e-01  3.290e-02 -3.400e-03]\n",
      " [-3.200e-02  2.000e-04  1.806e-01  7.100e-03 -3.000e-04]]\n",
      "mean_state_value 0.029305272888293734\n",
      "episode 404/600\n",
      "p1 0.9240000000000003 p0 0.018999999999999927\n",
      "trajectorySteps 40\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [27  1  0  0  0]\n",
      " [ 3  0  2  0  0]\n",
      " [ 3  1  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.080e-02 -2.140e-02 -1.360e-02 -4.700e-03 -6.200e-03]\n",
      " [-3.690e-02 -1.140e-02 -5.400e-03 -5.900e-03 -2.800e-03]\n",
      " [-3.090e-02 -4.070e-02  6.010e-02 -6.500e-03 -2.200e-03]\n",
      " [-3.600e-02  1.506e-01  5.865e-01  3.300e-02 -3.400e-03]\n",
      " [-3.210e-02  4.000e-04  1.820e-01  7.100e-03 -3.000e-04]]\n",
      "mean_state_value 0.029536601631294457\n",
      "episode 405/600\n",
      "p1 0.9248000000000003 p0 0.018799999999999928\n",
      "trajectorySteps 48\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [22 19  1  0  0]\n",
      " [ 2  0  2  0  0]\n",
      " [ 1  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄🔄⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.080e-02 -2.120e-02 -1.340e-02 -4.700e-03 -6.200e-03]\n",
      " [-3.660e-02 -1.130e-02 -5.300e-03 -5.900e-03 -2.800e-03]\n",
      " [-3.090e-02 -4.070e-02  6.160e-02 -6.500e-03 -2.200e-03]\n",
      " [-3.580e-02  1.508e-01  5.889e-01  3.300e-02 -3.400e-03]\n",
      " [-3.190e-02  5.000e-04  1.822e-01  7.100e-03 -3.000e-04]]\n",
      "mean_state_value 0.029767230333758547\n",
      "episode 406/600\n",
      "p1 0.9256000000000003 p0 0.01859999999999993\n",
      "trajectorySteps 52\n",
      "[[ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [12  0  0  0  0]\n",
      " [18  1  2  0  0]\n",
      " [18  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄🔄⏬⬆️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.070e-02 -2.100e-02 -1.330e-02 -4.600e-03 -6.100e-03]\n",
      " [-3.640e-02 -1.130e-02 -5.300e-03 -5.800e-03 -2.800e-03]\n",
      " [-3.090e-02 -4.040e-02  6.170e-02 -6.400e-03 -2.200e-03]\n",
      " [-3.590e-02  1.523e-01  5.913e-01  3.300e-02 -3.300e-03]\n",
      " [-3.210e-02  5.000e-04  1.824e-01  7.100e-03 -3.000e-04]]\n",
      "mean_state_value 0.02998327056936712\n",
      "episode 407/600\n",
      "p1 0.9264000000000003 p0 0.018399999999999927\n",
      "trajectorySteps 5\n",
      "[[0 0 0 0 0]\n",
      " [0 0 0 0 0]\n",
      " [1 0 0 0 0]\n",
      " [1 1 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄🔄⏬⬆️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.060e-02 -2.090e-02 -1.320e-02 -4.600e-03 -6.000e-03]\n",
      " [-3.610e-02 -1.120e-02 -5.200e-03 -5.700e-03 -2.800e-03]\n",
      " [-3.080e-02 -4.000e-02  6.180e-02 -6.300e-03 -2.200e-03]\n",
      " [-3.580e-02  1.538e-01  5.937e-01  3.310e-02 -3.300e-03]\n",
      " [-3.190e-02  5.000e-04  1.826e-01  7.200e-03 -3.000e-04]]\n",
      "mean_state_value 0.03021972813623214\n",
      "episode 408/600\n",
      "p1 0.9272000000000002 p0 0.018199999999999928\n",
      "trajectorySteps 29\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [24  2  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "🔄🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.060e-02 -2.070e-02 -1.310e-02 -4.600e-03 -6.000e-03]\n",
      " [-3.580e-02 -1.110e-02 -5.200e-03 -5.700e-03 -2.800e-03]\n",
      " [-3.080e-02 -3.970e-02  6.180e-02 -6.300e-03 -2.200e-03]\n",
      " [-3.580e-02  1.551e-01  5.961e-01  3.310e-02 -3.300e-03]\n",
      " [-3.180e-02  6.000e-04  1.827e-01  7.200e-03 -3.000e-04]]\n",
      "mean_state_value 0.03044453993794509\n",
      "episode 409/600\n",
      "p1 0.9280000000000003 p0 0.017999999999999926\n",
      "trajectorySteps 23\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  1  0  0  0]\n",
      " [ 1 18  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "🔄⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.050e-02 -2.050e-02 -1.300e-02 -4.500e-03 -5.900e-03]\n",
      " [-3.560e-02 -1.100e-02 -5.100e-03 -5.600e-03 -2.700e-03]\n",
      " [-3.070e-02 -4.000e-02  6.330e-02 -6.200e-03 -2.200e-03]\n",
      " [-3.560e-02  1.553e-01  5.985e-01  3.310e-02 -3.200e-03]\n",
      " [-3.160e-02  6.000e-04  1.829e-01  7.200e-03 -3.000e-04]]\n",
      "mean_state_value 0.0306683159969205\n",
      "episode 410/600\n",
      "p1 0.9288000000000003 p0 0.017799999999999927\n",
      "trajectorySteps 574\n",
      "[[  1   1   1   1   0]\n",
      " [ 11   0   0   1   1]\n",
      " [529   9   0   0   1]\n",
      " [ 12   0   2   0   1]\n",
      " [  0   0   1   1   1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.050e-02 -2.040e-02 -1.280e-02 -4.500e-03 -5.800e-03]\n",
      " [-3.550e-02 -1.090e-02 -5.100e-03 -5.600e-03 -2.700e-03]\n",
      " [-3.110e-02 -3.980e-02  6.340e-02 -6.100e-03 -2.200e-03]\n",
      " [-3.540e-02  1.554e-01  6.009e-01  3.320e-02 -3.200e-03]\n",
      " [-3.140e-02  7.000e-04  1.844e-01  7.400e-03 -3.000e-04]]\n",
      "mean_state_value 0.03088299053320914\n",
      "episode 411/600\n",
      "p1 0.9296000000000003 p0 0.017599999999999928\n",
      "trajectorySteps 37\n",
      "[[ 0  1  2  1  0]\n",
      " [ 1  1  0  1  1]\n",
      " [11 10  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  2  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "➡️🔄⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.040e-02 -2.020e-02 -1.270e-02 -4.500e-03 -5.800e-03]\n",
      " [-3.530e-02 -1.090e-02 -5.000e-03 -5.500e-03 -2.700e-03]\n",
      " [-3.120e-02 -3.970e-02  6.350e-02 -6.100e-03 -2.100e-03]\n",
      " [-3.520e-02  1.556e-01  6.033e-01  3.320e-02 -3.200e-03]\n",
      " [-3.130e-02  7.000e-04  1.859e-01  7.600e-03 -2.000e-04]]\n",
      "mean_state_value 0.031118244565596132\n",
      "episode 412/600\n",
      "p1 0.9304000000000003 p0 0.017399999999999926\n",
      "trajectorySteps 19\n",
      "[[0 1 1 1 0]\n",
      " [1 2 0 1 1]\n",
      " [2 2 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬇️🔄⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.030e-02 -2.000e-02 -1.260e-02 -4.400e-03 -5.700e-03]\n",
      " [-3.500e-02 -1.100e-02 -4.900e-03 -5.500e-03 -2.700e-03]\n",
      " [-3.110e-02 -3.960e-02  6.360e-02 -6.000e-03 -2.100e-03]\n",
      " [-3.490e-02  1.558e-01  6.058e-01  3.320e-02 -3.100e-03]\n",
      " [-3.110e-02  7.000e-04  1.873e-01  7.800e-03 -2.000e-04]]\n",
      "mean_state_value 0.03135259562055228\n",
      "episode 413/600\n",
      "p1 0.9312000000000002 p0 0.017199999999999927\n",
      "trajectorySteps 12\n",
      "[[0 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [2 0 0 0 0]\n",
      " [2 0 2 0 0]\n",
      " [2 1 1 0 0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "➡️🔄⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.030e-02 -1.980e-02 -1.250e-02 -4.400e-03 -5.700e-03]\n",
      " [-3.480e-02 -1.090e-02 -4.900e-03 -5.400e-03 -2.700e-03]\n",
      " [-3.110e-02 -3.920e-02  6.370e-02 -5.900e-03 -2.100e-03]\n",
      " [-3.470e-02  1.560e-01  6.082e-01  3.330e-02 -3.100e-03]\n",
      " [-3.110e-02  1.000e-03  1.888e-01  7.800e-03 -2.000e-04]]\n",
      "mean_state_value 0.03159408301363194\n",
      "episode 414/600\n",
      "p1 0.9320000000000003 p0 0.016999999999999925\n",
      "trajectorySteps 23\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  1  0  0  0]\n",
      " [ 2 17  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-2.020e-02 -1.970e-02 -1.240e-02 -4.400e-03 -5.600e-03]\n",
      " [-3.450e-02 -1.080e-02 -4.800e-03 -5.400e-03 -2.700e-03]\n",
      " [-3.100e-02 -3.940e-02  6.520e-02 -5.900e-03 -2.100e-03]\n",
      " [-3.450e-02  1.562e-01  6.106e-01  3.330e-02 -3.000e-03]\n",
      " [-3.090e-02  1.000e-03  1.890e-01  7.800e-03 -2.000e-04]]\n",
      "mean_state_value 0.031825131056225954\n",
      "episode 415/600\n",
      "p1 0.9328000000000003 p0 0.016799999999999926\n",
      "trajectorySteps 257\n",
      "[[  0   0   0   0   0]\n",
      " [  0   0   0   0   0]\n",
      " [  3   0   0   0   0]\n",
      " [124   0   2   0   0]\n",
      " [126   1   1   0   0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.010e-02 -1.950e-02 -1.230e-02 -4.300e-03 -5.500e-03]\n",
      " [-3.430e-02 -1.070e-02 -4.800e-03 -5.300e-03 -2.700e-03]\n",
      " [-3.100e-02 -3.900e-02  6.520e-02 -5.800e-03 -2.100e-03]\n",
      " [-3.440e-02  1.564e-01  6.131e-01  3.330e-02 -3.000e-03]\n",
      " [-3.130e-02  1.200e-03  1.905e-01  7.800e-03 -2.000e-04]]\n",
      "mean_state_value 0.032047023889040356\n",
      "episode 416/600\n",
      "p1 0.9336000000000003 p0 0.016599999999999927\n",
      "trajectorySteps 83\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [40 39  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬇️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.010e-02 -1.930e-02 -1.210e-02 -4.300e-03 -5.500e-03]\n",
      " [-3.400e-02 -1.060e-02 -4.700e-03 -5.300e-03 -2.600e-03]\n",
      " [-3.090e-02 -3.910e-02  6.530e-02 -5.700e-03 -2.100e-03]\n",
      " [-3.420e-02  1.579e-01  6.155e-01  3.330e-02 -3.000e-03]\n",
      " [-3.120e-02  1.200e-03  1.907e-01  7.800e-03 -2.000e-04]]\n",
      "mean_state_value 0.03227526270474378\n",
      "episode 417/600\n",
      "p1 0.9344000000000003 p0 0.016399999999999925\n",
      "trajectorySteps 92\n",
      "[[ 0  0  0  0  0]\n",
      " [ 2  0  0  0  0]\n",
      " [42  3  2  0  0]\n",
      " [41  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️🔄➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-2.000e-02 -1.910e-02 -1.200e-02 -4.200e-03 -5.400e-03]\n",
      " [-3.380e-02 -1.050e-02 -4.700e-03 -5.200e-03 -2.600e-03]\n",
      " [-3.090e-02 -3.900e-02  6.670e-02 -5.700e-03 -2.100e-03]\n",
      " [-3.420e-02  1.581e-01  6.179e-01  3.340e-02 -2.900e-03]\n",
      " [-3.100e-02  1.300e-03  1.909e-01  7.900e-03 -2.000e-04]]\n",
      "mean_state_value 0.03249861114162977\n",
      "episode 418/600\n",
      "p1 0.9352000000000003 p0 0.016199999999999926\n",
      "trajectorySteps 322\n",
      "[[ 2 66  1  1  0]\n",
      " [88  2  0  1  1]\n",
      " [89  0  0  0  1]\n",
      " [64  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️⬇️\n",
      "⬇️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.990e-02 -1.920e-02 -1.190e-02 -4.200e-03 -5.300e-03]\n",
      " [-3.380e-02 -1.050e-02 -4.600e-03 -5.100e-03 -2.600e-03]\n",
      " [-3.110e-02 -3.870e-02  6.680e-02 -5.600e-03 -2.000e-03]\n",
      " [-3.440e-02  1.582e-01  6.204e-01  3.340e-02 -2.900e-03]\n",
      " [-3.080e-02  1.300e-03  1.924e-01  8.000e-03 -2.000e-04]]\n",
      "mean_state_value 0.03270457581811514\n",
      "episode 419/600\n",
      "p1 0.9360000000000003 p0 0.015999999999999924\n",
      "trajectorySteps 29\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [13  2  1  0  0]\n",
      " [11  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️⬇️\n",
      "⬇️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.990e-02 -1.900e-02 -1.180e-02 -4.200e-03 -5.300e-03]\n",
      " [-3.350e-02 -1.040e-02 -4.600e-03 -5.100e-03 -2.600e-03]\n",
      " [-3.110e-02 -3.850e-02  6.830e-02 -5.500e-03 -2.000e-03]\n",
      " [-3.430e-02  1.584e-01  6.228e-01  3.340e-02 -2.900e-03]\n",
      " [-3.070e-02  1.400e-03  1.926e-01  8.100e-03 -2.000e-04]]\n",
      "mean_state_value 0.03293987457963639\n",
      "episode 420/600\n",
      "p1 0.9368000000000003 p0 0.015799999999999925\n",
      "trajectorySteps 22\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1 18  1  0  0]\n",
      " [ 0  0  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏬⏩️➡️⬇️\n",
      "⬇️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.980e-02 -1.880e-02 -1.170e-02 -4.100e-03 -5.200e-03]\n",
      " [-3.330e-02 -1.030e-02 -4.500e-03 -5.000e-03 -2.600e-03]\n",
      " [-3.110e-02 -3.870e-02  6.980e-02 -5.500e-03 -2.000e-03]\n",
      " [-3.410e-02  1.586e-01  6.253e-01  3.350e-02 -2.800e-03]\n",
      " [-3.050e-02  1.400e-03  1.928e-01  8.100e-03 -2.000e-04]]\n",
      "mean_state_value 0.0331724294708008\n",
      "episode 421/600\n",
      "p1 0.9376000000000003 p0 0.015599999999999925\n",
      "trajectorySteps 277\n",
      "[[ 0  0  0  0  0]\n",
      " [59  2  0  0  0]\n",
      " [75  2  0  0  0]\n",
      " [74  0  2  0  0]\n",
      " [61  1  1  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.970e-02 -1.860e-02 -1.150e-02 -4.100e-03 -5.200e-03]\n",
      " [-3.440e-02 -1.020e-02 -4.500e-03 -5.000e-03 -2.600e-03]\n",
      " [-3.110e-02 -3.840e-02  6.990e-02 -5.400e-03 -2.000e-03]\n",
      " [-3.440e-02  1.588e-01  6.278e-01  3.350e-02 -2.800e-03]\n",
      " [-3.120e-02  1.600e-03  1.943e-01  8.100e-03 -2.000e-04]]\n",
      "mean_state_value 0.03330600486720632\n",
      "episode 422/600\n",
      "p1 0.9384000000000003 p0 0.015399999999999924\n",
      "trajectorySteps 15\n",
      "[[0 1 1 1 0]\n",
      " [0 1 0 1 1]\n",
      " [1 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.970e-02 -1.850e-02 -1.140e-02 -4.100e-03 -5.100e-03]\n",
      " [-3.410e-02 -1.010e-02 -4.400e-03 -4.900e-03 -2.500e-03]\n",
      " [-3.110e-02 -3.820e-02  7.000e-02 -5.300e-03 -2.000e-03]\n",
      " [-3.420e-02  1.590e-01  6.302e-01  3.350e-02 -2.800e-03]\n",
      " [-3.100e-02  1.700e-03  1.958e-01  8.300e-03 -2.000e-04]]\n",
      "mean_state_value 0.03355418143990056\n",
      "episode 423/600\n",
      "p1 0.9392000000000003 p0 0.015199999999999925\n",
      "trajectorySteps 19\n",
      "[[0 1 1 1 0]\n",
      " [0 1 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [3 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "🔄⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.960e-02 -1.830e-02 -1.130e-02 -4.000e-03 -5.000e-03]\n",
      " [-3.380e-02 -1.010e-02 -4.400e-03 -4.900e-03 -2.500e-03]\n",
      " [-3.100e-02 -3.800e-02  7.010e-02 -5.300e-03 -2.000e-03]\n",
      " [-3.400e-02  1.592e-01  6.327e-01  3.360e-02 -2.700e-03]\n",
      " [-3.090e-02  1.700e-03  1.973e-01  8.500e-03 -2.000e-04]]\n",
      "mean_state_value 0.033801539200739784\n",
      "episode 424/600\n",
      "p1 0.9400000000000003 p0 0.014999999999999925\n",
      "trajectorySteps 63\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 1  0  0  0  0]\n",
      " [59  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.950e-02 -1.810e-02 -1.120e-02 -4.000e-03 -5.000e-03]\n",
      " [-3.360e-02 -1.000e-02 -4.300e-03 -4.800e-03 -2.500e-03]\n",
      " [-3.100e-02 -3.760e-02  7.020e-02 -5.200e-03 -2.000e-03]\n",
      " [-3.430e-02  1.607e-01  6.352e-01  3.360e-02 -2.700e-03]\n",
      " [-3.070e-02  1.700e-03  1.975e-01  8.500e-03 -2.000e-04]]\n",
      "mean_state_value 0.03403126447954898\n",
      "episode 425/600\n",
      "p1 0.9408000000000003 p0 0.014799999999999924\n",
      "trajectorySteps 40\n",
      "[[ 0  1  2  1  0]\n",
      " [ 0  1  1  1  1]\n",
      " [12 12  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  2  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️🔄⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.950e-02 -1.790e-02 -1.120e-02 -4.000e-03 -4.900e-03]\n",
      " [-3.330e-02 -9.900e-03 -4.300e-03 -4.800e-03 -2.500e-03]\n",
      " [-3.100e-02 -3.750e-02  7.020e-02 -5.200e-03 -1.900e-03]\n",
      " [-3.400e-02  1.609e-01  6.376e-01  3.360e-02 -2.700e-03]\n",
      " [-3.050e-02  1.800e-03  1.990e-01  8.700e-03 -2.000e-04]]\n",
      "mean_state_value 0.03426788283169464\n",
      "episode 426/600\n",
      "p1 0.9416000000000003 p0 0.014599999999999924\n",
      "trajectorySteps 30\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [ 2 25  0  0  0]\n",
      " [ 0  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.940e-02 -1.780e-02 -1.110e-02 -3.900e-03 -4.800e-03]\n",
      " [-3.300e-02 -9.800e-03 -4.200e-03 -4.700e-03 -2.500e-03]\n",
      " [-3.100e-02 -3.740e-02  7.030e-02 -5.100e-03 -1.900e-03]\n",
      " [-3.380e-02  1.625e-01  6.401e-01  3.360e-02 -2.600e-03]\n",
      " [-3.040e-02  1.800e-03  1.992e-01  8.700e-03 -1.000e-04]]\n",
      "mean_state_value 0.034508243373808464\n",
      "episode 427/600\n",
      "p1 0.9424000000000003 p0 0.014399999999999923\n",
      "trajectorySteps 28\n",
      "[[0 1 1 1 0]\n",
      " [0 1 0 1 1]\n",
      " [8 7 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.930e-02 -1.760e-02 -1.100e-02 -3.900e-03 -4.800e-03]\n",
      " [-3.280e-02 -9.800e-03 -4.200e-03 -4.700e-03 -2.500e-03]\n",
      " [-3.100e-02 -3.730e-02  7.040e-02 -5.000e-03 -1.900e-03]\n",
      " [-3.360e-02  1.627e-01  6.426e-01  3.370e-02 -2.600e-03]\n",
      " [-3.020e-02  1.900e-03  2.007e-01  8.900e-03 -1.000e-04]]\n",
      "mean_state_value 0.03475285366510096\n",
      "episode 428/600\n",
      "p1 0.9432000000000003 p0 0.014199999999999924\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 0]\n",
      " [3 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.930e-02 -1.740e-02 -1.090e-02 -3.900e-03 -4.700e-03]\n",
      " [-3.250e-02 -9.700e-03 -4.100e-03 -4.600e-03 -2.400e-03]\n",
      " [-3.090e-02 -3.690e-02  7.050e-02 -5.000e-03 -1.900e-03]\n",
      " [-3.330e-02  1.628e-01  6.451e-01  3.370e-02 -2.600e-03]\n",
      " [-3.000e-02  1.900e-03  2.023e-01  9.100e-03 -1.000e-04]]\n",
      "mean_state_value 0.03500865375900344\n",
      "episode 429/600\n",
      "p1 0.9440000000000003 p0 0.013999999999999924\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 0]\n",
      " [4 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.920e-02 -1.730e-02 -1.070e-02 -3.800e-03 -4.700e-03]\n",
      " [-3.230e-02 -9.600e-03 -4.100e-03 -4.500e-03 -2.400e-03]\n",
      " [-3.090e-02 -3.660e-02  7.060e-02 -4.900e-03 -1.900e-03]\n",
      " [-3.310e-02  1.630e-01  6.476e-01  3.370e-02 -2.500e-03]\n",
      " [-2.980e-02  1.900e-03  2.038e-01  9.300e-03 -1.000e-04]]\n",
      "mean_state_value 0.035263691063791736\n",
      "episode 430/600\n",
      "p1 0.9448000000000003 p0 0.013799999999999923\n",
      "trajectorySteps 40\n",
      "[[ 1  1  1  1  0]\n",
      " [25  0  0  1  1]\n",
      " [ 2  0  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.910e-02 -1.710e-02 -1.060e-02 -3.800e-03 -4.600e-03]\n",
      " [-3.220e-02 -9.500e-03 -4.000e-03 -4.500e-03 -2.400e-03]\n",
      " [-3.080e-02 -3.630e-02  7.070e-02 -4.800e-03 -1.900e-03]\n",
      " [-3.280e-02  1.632e-01  6.501e-01  3.380e-02 -2.500e-03]\n",
      " [-2.970e-02  2.000e-03  2.054e-01  9.500e-03 -1.000e-04]]\n",
      "mean_state_value 0.03551374189209519\n",
      "episode 431/600\n",
      "p1 0.9456000000000003 p0 0.013599999999999923\n",
      "trajectorySteps 20\n",
      "[[3 1 1 2 0]\n",
      " [2 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.910e-02 -1.690e-02 -1.050e-02 -3.700e-03 -4.500e-03]\n",
      " [-3.200e-02 -9.400e-03 -4.000e-03 -4.400e-03 -2.400e-03]\n",
      " [-3.080e-02 -3.590e-02  7.080e-02 -4.800e-03 -1.900e-03]\n",
      " [-3.260e-02  1.634e-01  6.515e-01  3.380e-02 -2.500e-03]\n",
      " [-2.950e-02  2.000e-03  2.069e-01  9.800e-03 -1.000e-04]]\n",
      "mean_state_value 0.03572963386394851\n",
      "episode 432/600\n",
      "p1 0.9464000000000004 p0 0.013399999999999922\n",
      "trajectorySteps 31\n",
      "[[0 1 2 1 0]\n",
      " [1 1 0 1 1]\n",
      " [8 7 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 2 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.900e-02 -1.670e-02 -1.040e-02 -3.700e-03 -4.500e-03]\n",
      " [-3.170e-02 -9.300e-03 -3.900e-03 -4.400e-03 -2.400e-03]\n",
      " [-3.070e-02 -3.580e-02  7.080e-02 -4.700e-03 -1.800e-03]\n",
      " [-3.230e-02  1.636e-01  6.541e-01  3.380e-02 -2.400e-03]\n",
      " [-2.930e-02  2.100e-03  2.085e-01  9.900e-03 -1.000e-04]]\n",
      "mean_state_value 0.03597762039342591\n",
      "episode 433/600\n",
      "p1 0.9472000000000003 p0 0.013199999999999924\n",
      "trajectorySteps 22\n",
      "[[1 1 1 1 0]\n",
      " [4 0 0 1 1]\n",
      " [5 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬇️⏬⏩️➡️⬇️\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-1.900e-02 -1.660e-02 -1.030e-02 -3.700e-03 -4.400e-03]\n",
      " [-3.150e-02 -9.300e-03 -3.900e-03 -4.300e-03 -2.400e-03]\n",
      " [-3.070e-02 -3.550e-02  7.090e-02 -4.600e-03 -1.800e-03]\n",
      " [-3.210e-02  1.638e-01  6.566e-01  3.390e-02 -2.400e-03]\n",
      " [-2.920e-02  2.100e-03  2.100e-01  1.020e-02 -1.000e-04]]\n",
      "mean_state_value 0.036234689839251176\n",
      "episode 434/600\n",
      "p1 0.9480000000000003 p0 0.012999999999999923\n",
      "trajectorySteps 63\n",
      "[[ 1  1  1  1  0]\n",
      " [23  1  0  1  1]\n",
      " [24  2  0  0  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "🔄⏩️⬆️⬅️⬅️\n",
      "[[-0.0189 -0.0164 -0.0101 -0.0036 -0.0043]\n",
      " [-0.0314 -0.0092 -0.0038 -0.0043 -0.0023]\n",
      " [-0.0307 -0.0351  0.071  -0.0046 -0.0018]\n",
      " [-0.0319  0.164   0.6591  0.0339 -0.0024]\n",
      " [-0.029   0.0021  0.2116  0.0104 -0.    ]]\n",
      "mean_state_value 0.03648445508435119\n",
      "episode 435/600\n",
      "p1 0.9488000000000003 p0 0.012799999999999923\n",
      "trajectorySteps 275\n",
      "[[  0   0   0   0   0]\n",
      " [  0   0   0   0   0]\n",
      " [  1   0   0   0   0]\n",
      " [  7   0   2   0   0]\n",
      " [263   1   1   0   0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "➡️⬅️⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0188 -0.0162 -0.01   -0.0036 -0.0043]\n",
      " [-0.0312 -0.0091 -0.0038 -0.0042 -0.0023]\n",
      " [-0.0306 -0.0348  0.0711 -0.0045 -0.0018]\n",
      " [-0.0316  0.1641  0.6616  0.0339 -0.0023]\n",
      " [-0.03    0.0024  0.2131  0.0104 -0.    ]]\n",
      "mean_state_value 0.036696450898697386\n",
      "episode 436/600\n",
      "p1 0.9496000000000003 p0 0.012599999999999922\n",
      "trajectorySteps 35\n",
      "[[ 0  0  0  0  0]\n",
      " [ 0  0  0  0  0]\n",
      " [16 15  0  0  0]\n",
      " [ 1  1  2  0  0]\n",
      " [ 0  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0188 -0.016  -0.0099 -0.0036 -0.0042]\n",
      " [-0.0309 -0.009  -0.0037 -0.0042 -0.0023]\n",
      " [-0.0306 -0.0347  0.0712 -0.0044 -0.0018]\n",
      " [-0.0314  0.1657  0.6641  0.034  -0.0023]\n",
      " [-0.0299  0.0024  0.2133  0.0104 -0.    ]]\n",
      "mean_state_value 0.03693655442986021\n",
      "episode 437/600\n",
      "p1 0.9504000000000004 p0 0.012399999999999922\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️🔄\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0187 -0.0159 -0.0098 -0.0035 -0.0042]\n",
      " [-0.0306 -0.0089 -0.0037 -0.0041 -0.0023]\n",
      " [-0.0305 -0.0344  0.0713 -0.0044 -0.0018]\n",
      " [-0.0312  0.1659  0.6666  0.034  -0.0022]\n",
      " [-0.0297  0.0025  0.2149  0.0106 -0.    ]]\n",
      "mean_state_value 0.037197151015826345\n",
      "episode 438/600\n",
      "p1 0.9512000000000003 p0 0.012199999999999923\n",
      "trajectorySteps 42\n",
      "[[ 1  1  1  1  0]\n",
      " [ 1  0  0  1  2]\n",
      " [ 2  0  0  0 25]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  2]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0186 -0.0157 -0.0097 -0.0035 -0.0041]\n",
      " [-0.0304 -0.0088 -0.0036 -0.0041 -0.0023]\n",
      " [-0.0305 -0.034   0.0714 -0.0043 -0.0018]\n",
      " [-0.0309  0.1661  0.6692  0.034  -0.0022]\n",
      " [-0.0295  0.0025  0.2165  0.0108 -0.    ]]\n",
      "mean_state_value 0.0374576974263038\n",
      "episode 439/600\n",
      "p1 0.9520000000000003 p0 0.011999999999999922\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0186 -0.0155 -0.0095 -0.0035 -0.004 ]\n",
      " [-0.0301 -0.0087 -0.0036 -0.004  -0.0023]\n",
      " [-0.0304 -0.0337  0.0714 -0.0042 -0.0018]\n",
      " [-0.0307  0.1663  0.6717  0.034  -0.0022]\n",
      " [-0.0293  0.0025  0.218   0.0111 -0.    ]]\n",
      "mean_state_value 0.037719637243748945\n",
      "episode 440/600\n",
      "p1 0.9528000000000003 p0 0.011799999999999922\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0185 -0.0154 -0.0094 -0.0034 -0.004 ]\n",
      " [-0.0298 -0.0087 -0.0035 -0.004  -0.0023]\n",
      " [-0.0303 -0.0333  0.0715 -0.0042 -0.0017]\n",
      " [-0.0305  0.1665  0.6742  0.0341 -0.0021]\n",
      " [-0.0292  0.0026  0.2196  0.0113  0.    ]]\n",
      "mean_state_value 0.037981659436408216\n",
      "episode 441/600\n",
      "p1 0.9536000000000003 p0 0.011599999999999921\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0184 -0.0152 -0.0093 -0.0034 -0.0039]\n",
      " [-0.0296 -0.0086 -0.0035 -0.0039 -0.0022]\n",
      " [-0.0303 -0.033   0.0716 -0.0041 -0.0017]\n",
      " [-0.0302  0.1667  0.6768  0.0341 -0.0021]\n",
      " [-0.029   0.0026  0.2212  0.0115  0.    ]]\n",
      "mean_state_value 0.03824441305527005\n",
      "episode 442/600\n",
      "p1 0.9544000000000004 p0 0.01139999999999992\n",
      "trajectorySteps 18\n",
      "[[2 1 1 1 0]\n",
      " [1 0 0 1 2]\n",
      " [1 0 0 0 2]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-0.0184 -0.015  -0.0092 -0.0034 -0.0038]\n",
      " [-0.0293 -0.0085 -0.0034 -0.0038 -0.0022]\n",
      " [-0.0302 -0.0326  0.0717 -0.004  -0.0017]\n",
      " [-0.03    0.1669  0.6793  0.0341 -0.0021]\n",
      " [-0.0288  0.0027  0.2228  0.0117  0.    ]]\n",
      "mean_state_value 0.03850717913596983\n",
      "episode 443/600\n",
      "p1 0.9552000000000003 p0 0.011199999999999922\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.830e-02 -1.480e-02 -9.100e-03 -3.300e-03 -3.800e-03]\n",
      " [-2.900e-02 -8.400e-03 -3.300e-03 -3.800e-03 -2.200e-03]\n",
      " [-3.020e-02 -3.230e-02  7.180e-02 -4.000e-03 -1.700e-03]\n",
      " [-2.970e-02  1.671e-01  6.809e-01  3.420e-02 -2.000e-03]\n",
      " [-2.870e-02  2.700e-03  2.244e-01  1.190e-02  1.000e-04]]\n",
      "mean_state_value 0.03872997888745307\n",
      "episode 444/600\n",
      "p1 0.9560000000000003 p0 0.010999999999999921\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.830e-02 -1.470e-02 -8.900e-03 -3.300e-03 -3.700e-03]\n",
      " [-2.880e-02 -8.300e-03 -3.300e-03 -3.700e-03 -2.200e-03]\n",
      " [-3.010e-02 -3.190e-02  7.190e-02 -3.900e-03 -1.700e-03]\n",
      " [-2.950e-02  1.672e-01  6.834e-01  3.420e-02 -2.000e-03]\n",
      " [-2.850e-02  2.800e-03  2.259e-01  1.220e-02  1.000e-04]]\n",
      "mean_state_value 0.03899344140515594\n",
      "episode 445/600\n",
      "p1 0.9568000000000003 p0 0.01079999999999992\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.820e-02 -1.450e-02 -8.800e-03 -3.300e-03 -3.600e-03]\n",
      " [-2.850e-02 -8.200e-03 -3.200e-03 -3.700e-03 -2.200e-03]\n",
      " [-3.000e-02 -3.160e-02  7.190e-02 -3.800e-03 -1.700e-03]\n",
      " [-2.930e-02  1.674e-01  6.860e-01  3.420e-02 -2.000e-03]\n",
      " [-2.830e-02  2.800e-03  2.275e-01  1.240e-02  1.000e-04]]\n",
      "mean_state_value 0.03925783401768545\n",
      "episode 446/600\n",
      "p1 0.9576000000000003 p0 0.01059999999999992\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 1 2 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.810e-02 -1.430e-02 -8.700e-03 -3.200e-03 -3.600e-03]\n",
      " [-2.830e-02 -8.100e-03 -3.200e-03 -3.600e-03 -2.200e-03]\n",
      " [-3.000e-02 -3.120e-02  7.200e-02 -3.800e-03 -1.700e-03]\n",
      " [-2.900e-02  1.676e-01  6.885e-01  3.430e-02 -1.900e-03]\n",
      " [-2.810e-02  3.000e-03  2.290e-01  1.260e-02  1.000e-04]]\n",
      "mean_state_value 0.039526415247691994\n",
      "episode 447/600\n",
      "p1 0.9584000000000004 p0 0.01039999999999992\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 0]\n",
      " [2 1 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.810e-02 -1.420e-02 -8.600e-03 -3.200e-03 -3.500e-03]\n",
      " [-2.810e-02 -8.100e-03 -3.100e-03 -3.600e-03 -2.100e-03]\n",
      " [-2.990e-02 -3.090e-02  7.210e-02 -3.700e-03 -1.600e-03]\n",
      " [-2.880e-02  1.678e-01  6.911e-01  3.430e-02 -1.900e-03]\n",
      " [-2.800e-02  3.100e-03  2.306e-01  1.280e-02  1.000e-04]]\n",
      "mean_state_value 0.03978753310955545\n",
      "episode 448/600\n",
      "p1 0.9592000000000003 p0 0.010199999999999921\n",
      "trajectorySteps 20\n",
      "[[2 1 1 1 0]\n",
      " [1 0 0 1 3]\n",
      " [2 0 0 0 2]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.800e-02 -1.400e-02 -8.500e-03 -3.200e-03 -3.500e-03]\n",
      " [-2.780e-02 -8.000e-03 -3.100e-03 -3.500e-03 -2.100e-03]\n",
      " [-2.980e-02 -3.050e-02  7.220e-02 -3.600e-03 -1.600e-03]\n",
      " [-2.860e-02  1.680e-01  6.937e-01  3.430e-02 -1.900e-03]\n",
      " [-2.780e-02  3.100e-03  2.322e-01  1.310e-02  1.000e-04]]\n",
      "mean_state_value 0.0400525653455363\n",
      "episode 449/600\n",
      "p1 0.9600000000000003 p0 0.00999999999999992\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 1 1]\n",
      " [0 0 0 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.790e-02 -1.380e-02 -8.300e-03 -3.100e-03 -3.400e-03]\n",
      " [-2.760e-02 -7.900e-03 -3.000e-03 -3.500e-03 -2.100e-03]\n",
      " [-2.980e-02 -3.020e-02  7.230e-02 -3.600e-03 -1.600e-03]\n",
      " [-2.830e-02  1.682e-01  6.963e-01  3.590e-02 -1.800e-03]\n",
      " [-2.760e-02  3.200e-03  2.324e-01  1.300e-02  2.000e-04]]\n",
      "mean_state_value 0.040314417802856745\n",
      "episode 450/600\n",
      "p1 0.9608000000000003 p0 0.00979999999999992\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️🔄⏬⬆️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.790e-02 -1.360e-02 -8.200e-03 -3.100e-03 -3.300e-03]\n",
      " [-2.730e-02 -7.800e-03 -3.000e-03 -3.400e-03 -2.100e-03]\n",
      " [-2.970e-02 -2.980e-02  7.240e-02 -3.500e-03 -1.600e-03]\n",
      " [-2.810e-02  1.684e-01  6.989e-01  3.600e-02 -1.800e-03]\n",
      " [-2.750e-02  3.200e-03  2.341e-01  1.320e-02  2.000e-04]]\n",
      "mean_state_value 0.04058113871935806\n",
      "episode 451/600\n",
      "p1 0.9616000000000003 p0 0.00959999999999992\n",
      "trajectorySteps 21\n",
      "[[1 1 1 1 0]\n",
      " [1 0 1 5 0]\n",
      " [2 0 1 4 0]\n",
      " [1 0 2 0 0]\n",
      " [0 0 0 0 0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.780e-02 -1.350e-02 -8.100e-03 -3.100e-03 -3.300e-03]\n",
      " [-2.700e-02 -7.700e-03 -2.900e-03 -3.400e-03 -2.100e-03]\n",
      " [-2.970e-02 -2.950e-02  7.400e-02 -3.600e-03 -1.600e-03]\n",
      " [-2.780e-02  1.686e-01  7.014e-01  3.600e-02 -1.800e-03]\n",
      " [-2.730e-02  3.300e-03  2.343e-01  1.330e-02  2.000e-04]]\n",
      "mean_state_value 0.04083773714093967\n",
      "episode 452/600\n",
      "p1 0.9624000000000004 p0 0.009399999999999919\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.770e-02 -1.330e-02 -8.000e-03 -3.000e-03 -3.200e-03]\n",
      " [-2.680e-02 -7.600e-03 -2.900e-03 -3.400e-03 -2.100e-03]\n",
      " [-2.960e-02 -2.910e-02  7.410e-02 -3.500e-03 -1.600e-03]\n",
      " [-2.760e-02  1.687e-01  7.040e-01  3.600e-02 -1.700e-03]\n",
      " [-2.710e-02  3.300e-03  2.359e-01  1.350e-02  2.000e-04]]\n",
      "mean_state_value 0.041105408200278394\n",
      "episode 453/600\n",
      "p1 0.9632000000000003 p0 0.00919999999999992\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.770e-02 -1.310e-02 -7.900e-03 -3.000e-03 -3.100e-03]\n",
      " [-2.650e-02 -7.500e-03 -2.800e-03 -3.300e-03 -2.100e-03]\n",
      " [-2.950e-02 -2.880e-02  7.420e-02 -3.400e-03 -1.600e-03]\n",
      " [-2.740e-02  1.689e-01  7.066e-01  3.610e-02 -1.700e-03]\n",
      " [-2.690e-02  3.300e-03  2.375e-01  1.370e-02  2.000e-04]]\n",
      "mean_state_value 0.04137349523968416\n",
      "episode 454/600\n",
      "p1 0.9640000000000003 p0 0.00899999999999992\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.760e-02 -1.290e-02 -7.700e-03 -3.000e-03 -3.100e-03]\n",
      " [-2.620e-02 -7.500e-03 -2.800e-03 -3.300e-03 -2.000e-03]\n",
      " [-2.950e-02 -2.840e-02  7.430e-02 -3.400e-03 -1.500e-03]\n",
      " [-2.710e-02  1.691e-01  7.092e-01  3.610e-02 -1.700e-03]\n",
      " [-2.680e-02  3.400e-03  2.391e-01  1.400e-02  2.000e-04]]\n",
      "mean_state_value 0.04164199867783628\n",
      "episode 455/600\n",
      "p1 0.9648000000000003 p0 0.008799999999999919\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.750e-02 -1.280e-02 -7.600e-03 -2.900e-03 -3.000e-03]\n",
      " [-2.600e-02 -7.400e-03 -2.700e-03 -3.200e-03 -2.000e-03]\n",
      " [-2.940e-02 -2.810e-02  7.440e-02 -3.300e-03 -1.500e-03]\n",
      " [-2.690e-02  1.693e-01  7.118e-01  3.610e-02 -1.600e-03]\n",
      " [-2.660e-02  3.400e-03  2.408e-01  1.420e-02  2.000e-04]]\n",
      "mean_state_value 0.041910336807511196\n",
      "episode 456/600\n",
      "p1 0.9656000000000003 p0 0.008599999999999918\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 1 2 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.740e-02 -1.260e-02 -7.500e-03 -2.900e-03 -3.000e-03]\n",
      " [-2.570e-02 -7.300e-03 -2.700e-03 -3.200e-03 -2.000e-03]\n",
      " [-2.930e-02 -2.770e-02  7.440e-02 -3.200e-03 -1.500e-03]\n",
      " [-2.660e-02  1.695e-01  7.144e-01  3.620e-02 -1.600e-03]\n",
      " [-2.640e-02  3.700e-03  2.423e-01  1.450e-02  3.000e-04]]\n",
      "mean_state_value 0.042185353167878636\n",
      "episode 457/600\n",
      "p1 0.9664000000000004 p0 0.008399999999999918\n",
      "trajectorySteps 18\n",
      "[[2 1 1 1 0]\n",
      " [2 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.740e-02 -1.240e-02 -7.400e-03 -2.800e-03 -2.900e-03]\n",
      " [-2.540e-02 -7.200e-03 -2.600e-03 -3.100e-03 -2.000e-03]\n",
      " [-2.930e-02 -2.740e-02  7.450e-02 -3.200e-03 -1.500e-03]\n",
      " [-2.640e-02  1.697e-01  7.170e-01  3.620e-02 -1.600e-03]\n",
      " [-2.630e-02  3.700e-03  2.439e-01  1.470e-02  3.000e-04]]\n",
      "mean_state_value 0.04245508876588322\n",
      "episode 458/600\n",
      "p1 0.9672000000000003 p0 0.00819999999999992\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "🔄⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️🔄\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.730e-02 -1.220e-02 -7.300e-03 -2.800e-03 -2.800e-03]\n",
      " [-2.520e-02 -7.100e-03 -2.600e-03 -3.100e-03 -2.000e-03]\n",
      " [-2.920e-02 -2.700e-02  7.460e-02 -3.100e-03 -1.500e-03]\n",
      " [-2.610e-02  1.699e-01  7.197e-01  3.620e-02 -1.500e-03]\n",
      " [-2.610e-02  3.800e-03  2.456e-01  1.500e-02  3.000e-04]]\n",
      "mean_state_value 0.04272517951385534\n",
      "episode 459/600\n",
      "p1 0.9680000000000003 p0 0.007999999999999919\n",
      "trajectorySteps 130\n",
      "[[ 0  1  1  1  0]\n",
      " [57  1  0  1  1]\n",
      " [ 3  0  0  0 58]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.720e-02 -1.210e-02 -7.100e-03 -2.800e-03 -2.800e-03]\n",
      " [-2.500e-02 -7.000e-03 -2.500e-03 -3.000e-03 -2.000e-03]\n",
      " [-2.910e-02 -2.670e-02  7.470e-02 -3.000e-03 -1.500e-03]\n",
      " [-2.590e-02  1.701e-01  7.223e-01  3.630e-02 -1.500e-03]\n",
      " [-2.590e-02  3.800e-03  2.472e-01  1.520e-02  3.000e-04]]\n",
      "mean_state_value 0.04299227782852773\n",
      "episode 460/600\n",
      "p1 0.9688000000000003 p0 0.007799999999999918\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 2]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️🔄⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.720e-02 -1.190e-02 -7.000e-03 -2.700e-03 -2.700e-03]\n",
      " [-2.470e-02 -6.900e-03 -2.500e-03 -2.900e-03 -1.900e-03]\n",
      " [-2.910e-02 -2.630e-02  7.480e-02 -2.900e-03 -1.500e-03]\n",
      " [-2.570e-02  1.702e-01  7.249e-01  3.630e-02 -1.400e-03]\n",
      " [-2.570e-02  3.800e-03  2.489e-01  1.550e-02  3.000e-04]]\n",
      "mean_state_value 0.04326323782527832\n",
      "episode 461/600\n",
      "p1 0.9696000000000004 p0 0.007599999999999918\n",
      "trajectorySteps 44\n",
      "[[ 0  1  1  1  0]\n",
      " [ 0  1  0  1  0]\n",
      " [ 1 30  0  1  1]\n",
      " [ 0  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.710e-02 -1.170e-02 -6.900e-03 -2.700e-03 -2.600e-03]\n",
      " [-2.440e-02 -6.900e-03 -2.400e-03 -2.900e-03 -1.900e-03]\n",
      " [-2.900e-02 -2.610e-02  7.490e-02 -2.900e-03 -1.500e-03]\n",
      " [-2.540e-02  1.704e-01  7.275e-01  3.630e-02 -1.400e-03]\n",
      " [-2.560e-02  3.900e-03  2.505e-01  1.570e-02  4.000e-04]]\n",
      "mean_state_value 0.04352968752218832\n",
      "episode 462/600\n",
      "p1 0.9704000000000004 p0 0.007399999999999918\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.700e-02 -1.150e-02 -6.800e-03 -2.700e-03 -2.600e-03]\n",
      " [-2.420e-02 -6.800e-03 -2.400e-03 -2.800e-03 -1.900e-03]\n",
      " [-2.890e-02 -2.580e-02  7.500e-02 -2.800e-03 -1.400e-03]\n",
      " [-2.520e-02  1.706e-01  7.302e-01  3.640e-02 -1.400e-03]\n",
      " [-2.540e-02  3.900e-03  2.522e-01  1.600e-02  4.000e-04]]\n",
      "mean_state_value 0.043801211995321235\n",
      "episode 463/600\n",
      "p1 0.9712000000000003 p0 0.007199999999999917\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 2]\n",
      " [0 0 1 1 2]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.700e-02 -1.140e-02 -6.700e-03 -2.600e-03 -2.500e-03]\n",
      " [-2.390e-02 -6.700e-03 -2.300e-03 -2.800e-03 -1.900e-03]\n",
      " [-2.890e-02 -2.540e-02  7.510e-02 -2.800e-03 -1.400e-03]\n",
      " [-2.490e-02  1.708e-01  7.328e-01  3.640e-02 -1.300e-03]\n",
      " [-2.520e-02  4.000e-03  2.538e-01  1.620e-02  4.000e-04]]\n",
      "mean_state_value 0.04407360397607255\n",
      "episode 464/600\n",
      "p1 0.9720000000000003 p0 0.006999999999999918\n",
      "trajectorySteps 20\n",
      "[[2 2 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.690e-02 -1.120e-02 -6.500e-03 -2.600e-03 -2.500e-03]\n",
      " [-2.360e-02 -6.600e-03 -2.300e-03 -2.700e-03 -1.900e-03]\n",
      " [-2.880e-02 -2.510e-02  7.510e-02 -2.700e-03 -1.400e-03]\n",
      " [-2.470e-02  1.710e-01  7.355e-01  3.640e-02 -1.300e-03]\n",
      " [-2.510e-02  4.000e-03  2.555e-01  1.650e-02  4.000e-04]]\n",
      "mean_state_value 0.04434672781445224\n",
      "episode 465/600\n",
      "p1 0.9728000000000003 p0 0.006799999999999917\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 1 2 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬⬆️⬇️\n",
      "⬇️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.680e-02 -1.100e-02 -6.400e-03 -2.600e-03 -2.400e-03]\n",
      " [-2.340e-02 -6.500e-03 -2.200e-03 -2.700e-03 -1.900e-03]\n",
      " [-2.870e-02 -2.470e-02  7.520e-02 -2.600e-03 -1.400e-03]\n",
      " [-2.440e-02  1.712e-01  7.381e-01  3.650e-02 -1.300e-03]\n",
      " [-2.490e-02  4.300e-03  2.571e-01  1.680e-02  4.000e-04]]\n",
      "mean_state_value 0.044627171373736264\n",
      "episode 466/600\n",
      "p1 0.9736000000000004 p0 0.006599999999999917\n",
      "trajectorySteps 211\n",
      "[[ 1  1  1  1  0]\n",
      " [ 1  0  0 34  0]\n",
      " [ 2  0  1 35  0]\n",
      " [66  0  2  0  0]\n",
      " [66  0  0  0  0]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.680e-02 -1.080e-02 -6.300e-03 -2.500e-03 -2.300e-03]\n",
      " [-2.310e-02 -6.400e-03 -2.200e-03 -2.600e-03 -1.800e-03]\n",
      " [-2.870e-02 -2.440e-02  7.690e-02 -2.600e-03 -1.400e-03]\n",
      " [-2.420e-02  1.714e-01  7.408e-01  3.650e-02 -1.200e-03]\n",
      " [-2.500e-02  4.300e-03  2.573e-01  1.680e-02  4.000e-04]]\n",
      "mean_state_value 0.04488182212758776\n",
      "episode 467/600\n",
      "p1 0.9744000000000004 p0 0.006399999999999917\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️🔄⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.670e-02 -1.070e-02 -6.200e-03 -2.500e-03 -2.300e-03]\n",
      " [-2.280e-02 -6.400e-03 -2.100e-03 -2.500e-03 -1.800e-03]\n",
      " [-2.860e-02 -2.400e-02  7.700e-02 -2.500e-03 -1.400e-03]\n",
      " [-2.400e-02  1.716e-01  7.434e-01  3.650e-02 -1.200e-03]\n",
      " [-2.490e-02  4.400e-03  2.590e-01  1.700e-02  5.000e-04]]\n",
      "mean_state_value 0.045156261572321894\n",
      "episode 468/600\n",
      "p1 0.9752000000000003 p0 0.0061999999999999165\n",
      "trajectorySteps 73\n",
      "[[ 1  1  1  2  0]\n",
      " [ 1  0  0 56  1]\n",
      " [ 2  0  0  0  1]\n",
      " [ 1  0  2  0  1]\n",
      " [ 0  0  1  1  1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.660e-02 -1.050e-02 -6.100e-03 -2.500e-03 -2.200e-03]\n",
      " [-2.250e-02 -6.300e-03 -2.100e-03 -2.500e-03 -1.800e-03]\n",
      " [-2.850e-02 -2.360e-02  7.710e-02 -2.500e-03 -1.300e-03]\n",
      " [-2.380e-02  1.718e-01  7.461e-01  3.660e-02 -1.200e-03]\n",
      " [-2.470e-02  4.400e-03  2.607e-01  1.730e-02  5.000e-04]]\n",
      "mean_state_value 0.04543058788927776\n",
      "episode 469/600\n",
      "p1 0.9760000000000003 p0 0.005999999999999917\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.650e-02 -1.030e-02 -5.900e-03 -2.400e-03 -2.100e-03]\n",
      " [-2.230e-02 -6.200e-03 -2.000e-03 -2.400e-03 -1.800e-03]\n",
      " [-2.850e-02 -2.330e-02  7.720e-02 -2.400e-03 -1.300e-03]\n",
      " [-2.350e-02  1.719e-01  7.487e-01  3.660e-02 -1.100e-03]\n",
      " [-2.450e-02  4.500e-03  2.624e-01  1.760e-02  5.000e-04]]\n",
      "mean_state_value 0.04570528544519933\n",
      "episode 470/600\n",
      "p1 0.9768000000000003 p0 0.005799999999999916\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.650e-02 -1.010e-02 -5.800e-03 -2.400e-03 -2.100e-03]\n",
      " [-2.200e-02 -6.100e-03 -2.000e-03 -2.400e-03 -1.800e-03]\n",
      " [-2.840e-02 -2.300e-02  7.730e-02 -2.300e-03 -1.300e-03]\n",
      " [-2.330e-02  1.721e-01  7.504e-01  3.660e-02 -1.100e-03]\n",
      " [-2.430e-02  4.500e-03  2.640e-01  1.780e-02  5.000e-04]]\n",
      "mean_state_value 0.04594022719231768\n",
      "episode 471/600\n",
      "p1 0.9776000000000004 p0 0.005599999999999916\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.640e-02 -1.000e-02 -5.700e-03 -2.300e-03 -2.000e-03]\n",
      " [-2.170e-02 -6.000e-03 -1.900e-03 -2.300e-03 -1.800e-03]\n",
      " [-2.830e-02 -2.260e-02  7.740e-02 -2.300e-03 -1.300e-03]\n",
      " [-2.300e-02  1.723e-01  7.521e-01  3.660e-02 -1.100e-03]\n",
      " [-2.420e-02  4.500e-03  2.657e-01  1.810e-02  6.000e-04]]\n",
      "mean_state_value 0.046175652727182076\n",
      "episode 472/600\n",
      "p1 0.9784000000000004 p0 0.005399999999999916\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 2]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.630e-02 -9.800e-03 -5.600e-03 -2.300e-03 -2.000e-03]\n",
      " [-2.150e-02 -5.900e-03 -1.800e-03 -2.300e-03 -1.700e-03]\n",
      " [-2.830e-02 -2.230e-02  7.750e-02 -2.200e-03 -1.300e-03]\n",
      " [-2.280e-02  1.725e-01  7.548e-01  3.670e-02 -1.000e-03]\n",
      " [-2.400e-02  4.600e-03  2.674e-01  1.840e-02  6.000e-04]]\n",
      "mean_state_value 0.046452063629062755\n",
      "episode 473/600\n",
      "p1 0.9792000000000003 p0 0.005199999999999916\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.630e-02 -9.600e-03 -5.400e-03 -2.300e-03 -1.900e-03]\n",
      " [-2.120e-02 -5.800e-03 -1.800e-03 -2.200e-03 -1.700e-03]\n",
      " [-2.820e-02 -2.190e-02  7.750e-02 -2.100e-03 -1.300e-03]\n",
      " [-2.260e-02  1.727e-01  7.574e-01  3.670e-02 -1.000e-03]\n",
      " [-2.380e-02  4.600e-03  2.691e-01  1.870e-02  6.000e-04]]\n",
      "mean_state_value 0.046728662101597464\n",
      "episode 474/600\n",
      "p1 0.9800000000000003 p0 0.004999999999999916\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.620e-02 -9.400e-03 -5.300e-03 -2.200e-03 -1.800e-03]\n",
      " [-2.090e-02 -5.800e-03 -1.700e-03 -2.200e-03 -1.700e-03]\n",
      " [-2.810e-02 -2.160e-02  7.760e-02 -2.100e-03 -1.300e-03]\n",
      " [-2.230e-02  1.729e-01  7.601e-01  3.670e-02 -9.000e-04]\n",
      " [-2.370e-02  4.700e-03  2.708e-01  1.890e-02  6.000e-04]]\n",
      "mean_state_value 0.0470060164185165\n",
      "episode 475/600\n",
      "p1 0.9808000000000003 p0 0.0047999999999999154\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.610e-02 -9.300e-03 -5.200e-03 -2.200e-03 -1.800e-03]\n",
      " [-2.070e-02 -5.700e-03 -1.700e-03 -2.100e-03 -1.700e-03]\n",
      " [-2.810e-02 -2.120e-02  7.770e-02 -2.000e-03 -1.200e-03]\n",
      " [-2.210e-02  1.731e-01  7.628e-01  3.680e-02 -9.000e-04]\n",
      " [-2.350e-02  4.700e-03  2.725e-01  1.920e-02  6.000e-04]]\n",
      "mean_state_value 0.047283819423913756\n",
      "episode 476/600\n",
      "p1 0.9816000000000004 p0 0.004599999999999915\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.600e-02 -9.100e-03 -5.100e-03 -2.200e-03 -1.700e-03]\n",
      " [-2.040e-02 -5.600e-03 -1.600e-03 -2.000e-03 -1.700e-03]\n",
      " [-2.800e-02 -2.090e-02  7.780e-02 -1.900e-03 -1.200e-03]\n",
      " [-2.180e-02  1.733e-01  7.655e-01  3.680e-02 -9.000e-04]\n",
      " [-2.330e-02  4.800e-03  2.742e-01  1.950e-02  7.000e-04]]\n",
      "mean_state_value 0.04756219662479846\n",
      "episode 477/600\n",
      "p1 0.9824000000000004 p0 0.004399999999999915\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.600e-02 -8.900e-03 -5.000e-03 -2.100e-03 -1.600e-03]\n",
      " [-2.010e-02 -5.500e-03 -1.600e-03 -2.000e-03 -1.700e-03]\n",
      " [-2.790e-02 -2.050e-02  7.790e-02 -1.900e-03 -1.200e-03]\n",
      " [-2.160e-02  1.734e-01  7.682e-01  3.680e-02 -8.000e-04]\n",
      " [-2.310e-02  4.800e-03  2.759e-01  1.980e-02  7.000e-04]]\n",
      "mean_state_value 0.04784043057959868\n",
      "episode 478/600\n",
      "p1 0.9832000000000003 p0 0.004199999999999915\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.590e-02 -8.700e-03 -4.800e-03 -2.100e-03 -1.600e-03]\n",
      " [-1.980e-02 -5.400e-03 -1.500e-03 -1.900e-03 -1.600e-03]\n",
      " [-2.780e-02 -2.020e-02  7.800e-02 -1.800e-03 -1.200e-03]\n",
      " [-2.140e-02  1.736e-01  7.709e-01  3.690e-02 -8.000e-04]\n",
      " [-2.300e-02  4.800e-03  2.776e-01  2.000e-02  7.000e-04]]\n",
      "mean_state_value 0.04811968768706727\n",
      "episode 479/600\n",
      "p1 0.9840000000000003 p0 0.003999999999999915\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.580e-02 -8.600e-03 -4.700e-03 -2.100e-03 -1.500e-03]\n",
      " [-1.960e-02 -5.300e-03 -1.500e-03 -1.900e-03 -1.600e-03]\n",
      " [-2.780e-02 -1.980e-02  7.810e-02 -1.700e-03 -1.200e-03]\n",
      " [-2.110e-02  1.738e-01  7.737e-01  3.690e-02 -8.000e-04]\n",
      " [-2.280e-02  4.900e-03  2.793e-01  2.030e-02  7.000e-04]]\n",
      "mean_state_value 0.04839938648650386\n",
      "episode 480/600\n",
      "p1 0.9848000000000003 p0 0.0037999999999999146\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.570e-02 -8.400e-03 -4.600e-03 -2.000e-03 -1.400e-03]\n",
      " [-1.930e-02 -5.200e-03 -1.400e-03 -1.800e-03 -1.600e-03]\n",
      " [-2.770e-02 -1.950e-02  7.820e-02 -1.600e-03 -1.200e-03]\n",
      " [-2.090e-02  1.740e-01  7.764e-01  3.690e-02 -7.000e-04]\n",
      " [-2.260e-02  4.900e-03  2.811e-01  2.060e-02  8.000e-04]]\n",
      "mean_state_value 0.0486794251939104\n",
      "episode 481/600\n",
      "p1 0.9856000000000004 p0 0.0035999999999999145\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.570e-02 -8.200e-03 -4.500e-03 -2.000e-03 -1.400e-03]\n",
      " [-1.900e-02 -5.200e-03 -1.400e-03 -1.800e-03 -1.600e-03]\n",
      " [-2.760e-02 -1.910e-02  7.820e-02 -1.600e-03 -1.200e-03]\n",
      " [-2.060e-02  1.742e-01  7.791e-01  3.700e-02 -7.000e-04]\n",
      " [-2.250e-02  5.000e-03  2.828e-01  2.090e-02  8.000e-04]]\n",
      "mean_state_value 0.04896001593309629\n",
      "episode 482/600\n",
      "p1 0.9864000000000004 p0 0.0033999999999999144\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.560e-02 -8.000e-03 -4.400e-03 -2.000e-03 -1.300e-03]\n",
      " [-1.880e-02 -5.100e-03 -1.300e-03 -1.700e-03 -1.600e-03]\n",
      " [-2.750e-02 -1.880e-02  7.830e-02 -1.500e-03 -1.100e-03]\n",
      " [-2.040e-02  1.744e-01  7.818e-01  3.700e-02 -7.000e-04]\n",
      " [-2.230e-02  5.000e-03  2.845e-01  2.120e-02  8.000e-04]]\n",
      "mean_state_value 0.04924048068657217\n",
      "episode 483/600\n",
      "p1 0.9872000000000003 p0 0.003199999999999914\n",
      "trajectorySteps 18\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [3 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.550e-02 -7.900e-03 -4.200e-03 -1.900e-03 -1.300e-03]\n",
      " [-1.850e-02 -5.000e-03 -1.300e-03 -1.600e-03 -1.600e-03]\n",
      " [-2.750e-02 -1.840e-02  7.840e-02 -1.400e-03 -1.100e-03]\n",
      " [-2.020e-02  1.746e-01  7.846e-01  3.700e-02 -6.000e-04]\n",
      " [-2.210e-02  5.000e-03  2.862e-01  2.150e-02  8.000e-04]]\n",
      "mean_state_value 0.049521530751143066\n",
      "episode 484/600\n",
      "p1 0.9880000000000003 p0 0.0029999999999999138\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.550e-02 -7.700e-03 -4.100e-03 -1.900e-03 -1.200e-03]\n",
      " [-1.820e-02 -4.900e-03 -1.200e-03 -1.600e-03 -1.500e-03]\n",
      " [-2.740e-02 -1.810e-02  7.850e-02 -1.400e-03 -1.100e-03]\n",
      " [-1.990e-02  1.748e-01  7.873e-01  3.710e-02 -6.000e-04]\n",
      " [-2.190e-02  5.100e-03  2.880e-01  2.180e-02  9.000e-04]]\n",
      "mean_state_value 0.04980345785660879\n",
      "episode 485/600\n",
      "p1 0.9888000000000003 p0 0.0027999999999999137\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.540e-02 -7.500e-03 -4.000e-03 -1.800e-03 -1.100e-03]\n",
      " [-1.800e-02 -4.800e-03 -1.200e-03 -1.500e-03 -1.500e-03]\n",
      " [-2.730e-02 -1.770e-02  7.860e-02 -1.300e-03 -1.100e-03]\n",
      " [-1.970e-02  1.749e-01  7.900e-01  3.710e-02 -6.000e-04]\n",
      " [-2.180e-02  5.100e-03  2.897e-01  2.210e-02  9.000e-04]]\n",
      "mean_state_value 0.05008575714758443\n",
      "episode 486/600\n",
      "p1 0.9896000000000004 p0 0.0025999999999999136\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.530e-02 -7.300e-03 -3.900e-03 -1.800e-03 -1.100e-03]\n",
      " [-1.770e-02 -4.700e-03 -1.100e-03 -1.500e-03 -1.500e-03]\n",
      " [-2.730e-02 -1.740e-02  7.870e-02 -1.200e-03 -1.100e-03]\n",
      " [-1.940e-02  1.751e-01  7.928e-01  3.710e-02 -5.000e-04]\n",
      " [-2.160e-02  5.200e-03  2.914e-01  2.240e-02  9.000e-04]]\n",
      "mean_state_value 0.05036816356485541\n",
      "episode 487/600\n",
      "p1 0.9904000000000004 p0 0.0023999999999999135\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.520e-02 -7.100e-03 -3.800e-03 -1.800e-03 -1.000e-03]\n",
      " [-1.740e-02 -4.600e-03 -1.100e-03 -1.400e-03 -1.500e-03]\n",
      " [-2.720e-02 -1.700e-02  7.880e-02 -1.200e-03 -1.100e-03]\n",
      " [-1.920e-02  1.753e-01  7.955e-01  3.720e-02 -5.000e-04]\n",
      " [-2.140e-02  5.200e-03  2.932e-01  2.270e-02  9.000e-04]]\n",
      "mean_state_value 0.05065137749869611\n",
      "episode 488/600\n",
      "p1 0.9912000000000003 p0 0.002199999999999913\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.520e-02 -7.000e-03 -3.600e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.710e-02 -4.600e-03 -1.000e-03 -1.400e-03 -1.500e-03]\n",
      " [-2.710e-02 -1.670e-02  7.890e-02 -1.100e-03 -1.000e-03]\n",
      " [-1.900e-02  1.755e-01  7.983e-01  3.720e-02 -4.000e-04]\n",
      " [-2.130e-02  5.200e-03  2.949e-01  2.300e-02  1.000e-03]]\n",
      "mean_state_value 0.05093455674020147\n",
      "episode 489/600\n",
      "p1 0.9920000000000003 p0 0.001999999999999913\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.510e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.700e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.010e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  2.967e-01  2.330e-02  1.000e-03]]\n",
      "mean_state_value 0.051218328454705225\n",
      "episode 490/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.510e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.700e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.030e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  2.982e-01  2.350e-02  1.000e-03]]\n",
      "mean_state_value 0.05136820503394637\n",
      "episode 491/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.510e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.700e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.049e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  2.996e-01  2.380e-02  1.000e-03]]\n",
      "mean_state_value 0.051518153905405414\n",
      "episode 492/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.510e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.700e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.069e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.011e-01  2.410e-02  1.100e-03]]\n",
      "mean_state_value 0.05166817506549037\n",
      "episode 493/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 1]\n",
      " [1 0 0 1 2]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.510e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.700e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.088e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.026e-01  2.440e-02  1.100e-03]]\n",
      "mean_state_value 0.051817670377433435\n",
      "episode 494/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.700e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.108e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.041e-01  2.460e-02  1.100e-03]]\n",
      "mean_state_value 0.0519678361042379\n",
      "episode 495/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.700e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.128e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.056e-01  2.490e-02  1.100e-03]]\n",
      "mean_state_value 0.05211766204014098\n",
      "episode 496/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.700e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.147e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.071e-01  2.520e-02  1.200e-03]]\n",
      "mean_state_value 0.05226791842162777\n",
      "episode 497/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 2]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.700e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.167e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.085e-01  2.550e-02  1.200e-03]]\n",
      "mean_state_value 0.05241822705854321\n",
      "episode 498/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.690e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.186e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.100e-01  2.570e-02  1.200e-03]]\n",
      "mean_state_value 0.05256868180257979\n",
      "episode 499/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.690e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.206e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.115e-01  2.600e-02  1.200e-03]]\n",
      "mean_state_value 0.05271879859577787\n",
      "episode 500/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.690e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.225e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.130e-01  2.630e-02  1.300e-03]]\n",
      "mean_state_value 0.052869397946781194\n",
      "episode 501/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.690e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.245e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.145e-01  2.660e-02  1.300e-03]]\n",
      "mean_state_value 0.05302001563056945\n",
      "episode 502/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.690e-02 -1.630e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.264e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.160e-01  2.690e-02  1.300e-03]]\n",
      "mean_state_value 0.053170759491834946\n",
      "episode 503/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.690e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.870e-02  1.757e-01  8.284e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.175e-01  2.710e-02  1.300e-03]]\n",
      "mean_state_value 0.05332102843685162\n",
      "episode 504/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 2 2]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.690e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.303e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.190e-01  2.740e-02  1.400e-03]]\n",
      "mean_state_value 0.053472531544620505\n",
      "episode 505/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.690e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.323e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.205e-01  2.770e-02  1.400e-03]]\n",
      "mean_state_value 0.053623492126250005\n",
      "episode 506/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.500e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.690e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.342e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.219e-01  2.800e-02  1.400e-03]]\n",
      "mean_state_value 0.053774524859058564\n",
      "episode 507/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.690e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.362e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.234e-01  2.830e-02  1.500e-03]]\n",
      "mean_state_value 0.053925629826548416\n",
      "episode 508/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.690e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.381e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.249e-01  2.860e-02  1.500e-03]]\n",
      "mean_state_value 0.05407640091840253\n",
      "episode 509/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.680e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.401e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.264e-01  2.890e-02  1.500e-03]]\n",
      "mean_state_value 0.05422765034445199\n",
      "episode 510/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.680e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.420e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.279e-01  2.920e-02  1.500e-03]]\n",
      "mean_state_value 0.054378972085122365\n",
      "episode 511/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.680e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.440e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.294e-01  2.950e-02  1.600e-03]]\n",
      "mean_state_value 0.05452996136137671\n",
      "episode 512/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.680e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.459e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.309e-01  2.980e-02  1.600e-03]]\n",
      "mean_state_value 0.05468102362685459\n",
      "episode 513/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 2 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.680e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.479e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.324e-01  3.010e-02  1.600e-03]]\n",
      "mean_state_value 0.054832496337419635\n",
      "episode 514/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.680e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.498e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.339e-01  3.040e-02  1.600e-03]]\n",
      "mean_state_value 0.054984053005890446\n",
      "episode 515/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.800e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.680e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.518e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.354e-01  3.070e-02  1.700e-03]]\n",
      "mean_state_value 0.05513519350243496\n",
      "episode 516/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.680e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.537e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.369e-01  3.100e-02  1.700e-03]]\n",
      "mean_state_value 0.05528640710970317\n",
      "episode 517/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.680e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.557e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.384e-01  3.130e-02  1.700e-03]]\n",
      "mean_state_value 0.055438234257016245\n",
      "episode 518/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.680e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.576e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.399e-01  3.160e-02  1.800e-03]]\n",
      "mean_state_value 0.05558973223560386\n",
      "episode 519/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.680e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.596e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.414e-01  3.190e-02  1.800e-03]]\n",
      "mean_state_value 0.055741164067111534\n",
      "episode 520/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[0 1 1 1 0]\n",
      " [1 1 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.490e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.500e-03]\n",
      " [-2.670e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.615e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.429e-01  3.220e-02  1.800e-03]]\n",
      "mean_state_value 0.055891708362031035\n",
      "episode 521/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.670e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.635e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.444e-01  3.250e-02  1.900e-03]]\n",
      "mean_state_value 0.056043769773001004\n",
      "episode 522/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.670e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.654e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.459e-01  3.280e-02  1.900e-03]]\n",
      "mean_state_value 0.05619595718196818\n",
      "episode 523/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.670e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.674e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.474e-01  3.310e-02  1.900e-03]]\n",
      "mean_state_value 0.05634816300696819\n",
      "episode 524/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.670e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.693e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.489e-01  3.340e-02  1.900e-03]]\n",
      "mean_state_value 0.05650049466854746\n",
      "episode 525/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.670e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.713e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.503e-01  3.370e-02  2.000e-03]]\n",
      "mean_state_value 0.05665250047488555\n",
      "episode 526/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.670e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.732e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.518e-01  3.400e-02  2.000e-03]]\n",
      "mean_state_value 0.05680457920800212\n",
      "episode 527/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.670e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.752e-01  3.720e-02 -4.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.533e-01  3.430e-02  2.000e-03]]\n",
      "mean_state_value 0.05695712747267852\n",
      "episode 528/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.670e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.771e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.548e-01  3.460e-02  2.100e-03]]\n",
      "mean_state_value 0.05710935164886198\n",
      "episode 529/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.670e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.791e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.563e-01  3.500e-02  2.100e-03]]\n",
      "mean_state_value 0.05726164873797059\n",
      "episode 530/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.670e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.880e-02  1.757e-01  8.810e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.578e-01  3.530e-02  2.100e-03]]\n",
      "mean_state_value 0.057414413368091395\n",
      "episode 531/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.660e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  8.830e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.593e-01  3.560e-02  2.200e-03]]\n",
      "mean_state_value 0.057566855874343216\n",
      "episode 532/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.660e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  8.849e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.608e-01  3.590e-02  2.200e-03]]\n",
      "mean_state_value 0.05771937127967558\n",
      "episode 533/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.660e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  8.869e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.624e-01  3.620e-02  2.200e-03]]\n",
      "mean_state_value 0.05787229874362228\n",
      "episode 534/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.480e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.660e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  8.888e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.639e-01  3.660e-02  2.300e-03]]\n",
      "mean_state_value 0.05802529839972541\n",
      "episode 535/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.660e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  8.908e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.654e-01  3.690e-02  2.300e-03]]\n",
      "mean_state_value 0.05817789024841479\n",
      "episode 536/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.660e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  8.927e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.669e-01  3.720e-02  2.300e-03]]\n",
      "mean_state_value 0.058330555119355115\n",
      "episode 537/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.660e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  8.947e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.684e-01  3.750e-02  2.400e-03]]\n",
      "mean_state_value 0.058483824715027645\n",
      "episode 538/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.660e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  8.966e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.699e-01  3.780e-02  2.400e-03]]\n",
      "mean_state_value 0.058637166434172626\n",
      "episode 539/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.660e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  8.986e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.714e-01  3.820e-02  2.400e-03]]\n",
      "mean_state_value 0.05879052678277296\n",
      "episode 540/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.660e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.005e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.729e-01  3.850e-02  2.500e-03]]\n",
      "mean_state_value 0.05894362232145787\n",
      "episode 541/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.660e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.025e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.744e-01  3.880e-02  2.500e-03]]\n",
      "mean_state_value 0.059097126953737984\n",
      "episode 542/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.660e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.044e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.759e-01  3.920e-02  2.500e-03]]\n",
      "mean_state_value 0.05925036776434154\n",
      "episode 543/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.650e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.063e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.774e-01  3.950e-02  2.600e-03]]\n",
      "mean_state_value 0.059403681425237435\n",
      "episode 544/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.650e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.083e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.789e-01  3.980e-02  2.600e-03]]\n",
      "mean_state_value 0.05955740242914229\n",
      "episode 545/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.650e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.102e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.804e-01  4.020e-02  2.700e-03]]\n",
      "mean_state_value 0.05971124891127969\n",
      "episode 546/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.650e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.122e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.819e-01  4.050e-02  2.700e-03]]\n",
      "mean_state_value 0.059865167603601346\n",
      "episode 547/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.470e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.650e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.141e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.834e-01  4.080e-02  2.700e-03]]\n",
      "mean_state_value 0.06001915838741971\n",
      "episode 548/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.650e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.161e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.849e-01  4.120e-02  2.800e-03]]\n",
      "mean_state_value 0.06017322114225046\n",
      "episode 549/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.650e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.180e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.864e-01  4.150e-02  2.800e-03]]\n",
      "mean_state_value 0.060327302657488355\n",
      "episode 550/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [1 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.650e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.200e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.879e-01  4.180e-02  2.800e-03]]\n",
      "mean_state_value 0.06048150957600362\n",
      "episode 551/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.300e-03 -1.400e-03]\n",
      " [-2.650e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.219e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.895e-01  4.220e-02  2.900e-03]]\n",
      "mean_state_value 0.060635788689817775\n",
      "episode 552/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.650e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.239e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.910e-01  4.250e-02  2.900e-03]]\n",
      "mean_state_value 0.060790088799719234\n",
      "episode 553/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.650e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.258e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.925e-01  4.290e-02  3.000e-03]]\n",
      "mean_state_value 0.06094411303078945\n",
      "episode 554/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.640e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.278e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.940e-01  4.320e-02  3.000e-03]]\n",
      "mean_state_value 0.061098100958024884\n",
      "episode 555/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.640e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.297e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.955e-01  4.360e-02  3.000e-03]]\n",
      "mean_state_value 0.06125264714074628\n",
      "episode 556/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.640e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.317e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.970e-01  4.390e-02  3.100e-03]]\n",
      "mean_state_value 0.061407265566211545\n",
      "episode 557/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 2]\n",
      " [1 0 2 0 2]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.640e-02 -1.640e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.336e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  3.985e-01  4.430e-02  3.100e-03]]\n",
      "mean_state_value 0.06156164169538983\n",
      "episode 558/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.640e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.356e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.000e-01  4.460e-02  3.200e-03]]\n",
      "mean_state_value 0.06171588003330153\n",
      "episode 559/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 19\n",
      "[[2 1 1 1 0]\n",
      " [2 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.640e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.375e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.015e-01  4.500e-02  3.200e-03]]\n",
      "mean_state_value 0.0618703237654155\n",
      "episode 560/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.640e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.395e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.031e-01  4.530e-02  3.200e-03]]\n",
      "mean_state_value 0.06202517960908983\n",
      "episode 561/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.460e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.640e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.414e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.046e-01  4.570e-02  3.300e-03]]\n",
      "mean_state_value 0.06218014901637394\n",
      "episode 562/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.640e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.434e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.061e-01  4.600e-02  3.300e-03]]\n",
      "mean_state_value 0.06233481887857046\n",
      "episode 563/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.640e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.453e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.076e-01  4.640e-02  3.400e-03]]\n",
      "mean_state_value 0.062489419585479135\n",
      "episode 564/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.640e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.472e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.091e-01  4.670e-02  3.400e-03]]\n",
      "mean_state_value 0.06264423993107818\n",
      "episode 565/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.640e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.492e-01  3.720e-02 -3.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.106e-01  4.710e-02  3.500e-03]]\n",
      "mean_state_value 0.06279902722628082\n",
      "episode 566/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.630e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.511e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.121e-01  4.740e-02  3.500e-03]]\n",
      "mean_state_value 0.06295436585301545\n",
      "episode 567/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.630e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.531e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.137e-01  4.780e-02  3.500e-03]]\n",
      "mean_state_value 0.06310977662813289\n",
      "episode 568/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.630e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.550e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.152e-01  4.810e-02  3.600e-03]]\n",
      "mean_state_value 0.06326526753645076\n",
      "episode 569/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.630e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.570e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.167e-01  4.850e-02  3.600e-03]]\n",
      "mean_state_value 0.06342080748260905\n",
      "episode 570/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.630e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.589e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.182e-01  4.890e-02  3.700e-03]]\n",
      "mean_state_value 0.06357643416189732\n",
      "episode 571/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.630e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.890e-02  1.757e-01  9.609e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.197e-01  4.920e-02  3.700e-03]]\n",
      "mean_state_value 0.06373213474849385\n",
      "episode 572/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.630e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.628e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.212e-01  4.960e-02  3.800e-03]]\n",
      "mean_state_value 0.06388757376604251\n",
      "episode 573/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.630e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.648e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.228e-01  5.000e-02  3.800e-03]]\n",
      "mean_state_value 0.06404304069192532\n",
      "episode 574/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.630e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.667e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.243e-01  5.030e-02  3.900e-03]]\n",
      "mean_state_value 0.06419890232551809\n",
      "episode 575/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.450e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.630e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.687e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.258e-01  5.070e-02  3.900e-03]]\n",
      "mean_state_value 0.06435437721726531\n",
      "episode 576/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.630e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.706e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.273e-01  5.110e-02  3.900e-03]]\n",
      "mean_state_value 0.06450996154214544\n",
      "episode 577/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.726e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.288e-01  5.140e-02  4.000e-03]]\n",
      "mean_state_value 0.06466557714403137\n",
      "episode 578/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.745e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.304e-01  5.180e-02  4.000e-03]]\n",
      "mean_state_value 0.06482140604474312\n",
      "episode 579/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.764e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.319e-01  5.220e-02  4.100e-03]]\n",
      "mean_state_value 0.06497768912173378\n",
      "episode 580/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 1 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.784e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.334e-01  5.260e-02  4.100e-03]]\n",
      "mean_state_value 0.06513356034087099\n",
      "episode 581/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.803e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.349e-01  5.290e-02  4.200e-03]]\n",
      "mean_state_value 0.06528992588237963\n",
      "episode 582/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.700e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.823e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.364e-01  5.330e-02  4.200e-03]]\n",
      "mean_state_value 0.06544636566847095\n",
      "episode 583/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.600e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.842e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.380e-01  5.370e-02  4.300e-03]]\n",
      "mean_state_value 0.06560291947888662\n",
      "episode 584/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 1 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.600e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.862e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.395e-01  5.410e-02  4.300e-03]]\n",
      "mean_state_value 0.06575904212232139\n",
      "episode 585/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.600e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.881e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.410e-01  5.440e-02  4.400e-03]]\n",
      "mean_state_value 0.06591569565948763\n",
      "episode 586/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.600e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.901e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.425e-01  5.480e-02  4.400e-03]]\n",
      "mean_state_value 0.06607242709652307\n",
      "episode 587/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.600e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.920e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.441e-01  5.520e-02  4.500e-03]]\n",
      "mean_state_value 0.06622931802080609\n",
      "episode 588/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.600e-03 -3.500e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.940e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.456e-01  5.560e-02  4.500e-03]]\n",
      "mean_state_value 0.06638618750285222\n",
      "episode 589/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.440e-02 -6.600e-03 -3.400e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.620e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.959e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.471e-01  5.600e-02  4.600e-03]]\n",
      "mean_state_value 0.06654318163732692\n",
      "episode 590/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.430e-02 -6.600e-03 -3.400e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.610e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.978e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.486e-01  5.630e-02  4.600e-03]]\n",
      "mean_state_value 0.06669988788536917\n",
      "episode 591/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.430e-02 -6.600e-03 -3.400e-03 -1.700e-03 -9.000e-04]\n",
      " [-1.690e-02 -4.500e-03 -1.000e-03 -1.400e-03 -1.400e-03]\n",
      " [-2.610e-02 -1.650e-02  7.890e-02 -1.000e-03 -1.000e-03]\n",
      " [-1.900e-02  1.757e-01  9.998e-01  3.720e-02 -2.000e-04]\n",
      " [-2.110e-02  5.300e-03  4.501e-01  5.670e-02  4.700e-03]]\n",
      "mean_state_value 0.06685700988052899\n",
      "episode 592/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 20\n",
      "[[1 1 1 1 0]\n",
      " [2 0 0 2 0]\n",
      " [2 0 0 2 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.4300e-02 -6.6000e-03 -3.4000e-03 -1.7000e-03 -9.0000e-04]\n",
      " [-1.6900e-02 -4.5000e-03 -1.0000e-03 -1.4000e-03 -1.4000e-03]\n",
      " [-2.6100e-02 -1.6500e-02  7.8900e-02 -1.0000e-03 -1.0000e-03]\n",
      " [-1.9000e-02  1.7570e-01  1.0017e+00  3.7200e-02 -1.0000e-04]\n",
      " [-2.1100e-02  5.3000e-03  4.5170e-01  5.7100e-02  4.7000e-03]]\n",
      "mean_state_value 0.06701377418784929\n",
      "episode 593/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.4300e-02 -6.6000e-03 -3.4000e-03 -1.7000e-03 -9.0000e-04]\n",
      " [-1.6900e-02 -4.5000e-03 -1.0000e-03 -1.4000e-03 -1.4000e-03]\n",
      " [-2.6100e-02 -1.6500e-02  7.8900e-02 -1.0000e-03 -1.0000e-03]\n",
      " [-1.9000e-02  1.7570e-01  1.0037e+00  3.7200e-02 -1.0000e-04]\n",
      " [-2.1100e-02  5.3000e-03  4.5320e-01  5.7500e-02  4.8000e-03]]\n",
      "mean_state_value 0.06717104579156392\n",
      "episode 594/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.4300e-02 -6.6000e-03 -3.4000e-03 -1.7000e-03 -9.0000e-04]\n",
      " [-1.6900e-02 -4.5000e-03 -1.0000e-03 -1.4000e-03 -1.4000e-03]\n",
      " [-2.6100e-02 -1.6500e-02  7.8900e-02 -1.0000e-03 -1.0000e-03]\n",
      " [-1.9000e-02  1.7570e-01  1.0056e+00  3.7200e-02 -1.0000e-04]\n",
      " [-2.1100e-02  5.3000e-03  4.5470e-01  5.7900e-02  4.8000e-03]]\n",
      "mean_state_value 0.06732839964098739\n",
      "episode 595/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.4300e-02 -6.6000e-03 -3.4000e-03 -1.7000e-03 -9.0000e-04]\n",
      " [-1.6900e-02 -4.5000e-03 -1.0000e-03 -1.4000e-03 -1.4000e-03]\n",
      " [-2.6100e-02 -1.6500e-02  7.8900e-02 -1.0000e-03 -1.0000e-03]\n",
      " [-1.9000e-02  1.7570e-01  1.0076e+00  3.7200e-02 -1.0000e-04]\n",
      " [-2.1100e-02  5.3000e-03  4.5630e-01  5.8300e-02  4.9000e-03]]\n",
      "mean_state_value 0.06748577270946562\n",
      "episode 596/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 15\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [1 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.4300e-02 -6.6000e-03 -3.4000e-03 -1.7000e-03 -9.0000e-04]\n",
      " [-1.6900e-02 -4.5000e-03 -1.0000e-03 -1.4000e-03 -1.4000e-03]\n",
      " [-2.6100e-02 -1.6500e-02  7.8900e-02 -1.0000e-03 -1.0000e-03]\n",
      " [-1.9000e-02  1.7570e-01  1.0095e+00  3.7200e-02 -1.0000e-04]\n",
      " [-2.1100e-02  5.3000e-03  4.5780e-01  5.8700e-02  4.9000e-03]]\n",
      "mean_state_value 0.06764327821375395\n",
      "episode 597/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 1]\n",
      " [2 0 0 0 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.4300e-02 -6.6000e-03 -3.4000e-03 -1.7000e-03 -9.0000e-04]\n",
      " [-1.6900e-02 -4.5000e-03 -1.0000e-03 -1.4000e-03 -1.4000e-03]\n",
      " [-2.6100e-02 -1.6500e-02  7.8900e-02 -1.0000e-03 -1.0000e-03]\n",
      " [-1.9000e-02  1.7570e-01  1.0115e+00  3.7200e-02 -1.0000e-04]\n",
      " [-2.1100e-02  5.3000e-03  4.5930e-01  5.9100e-02  5.0000e-03]]\n",
      "mean_state_value 0.06780088614219854\n",
      "episode 598/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 16\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [0 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️⬇️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.4300e-02 -6.6000e-03 -3.4000e-03 -1.7000e-03 -9.0000e-04]\n",
      " [-1.6900e-02 -4.5000e-03 -1.0000e-03 -1.4000e-03 -1.4000e-03]\n",
      " [-2.6100e-02 -1.6500e-02  7.8900e-02 -1.0000e-03 -1.0000e-03]\n",
      " [-1.9000e-02  1.7570e-01  1.0134e+00  3.7200e-02 -1.0000e-04]\n",
      " [-2.1100e-02  5.3000e-03  4.6080e-01  5.9500e-02  5.0000e-03]]\n",
      "mean_state_value 0.06795852763928174\n",
      "episode 599/600\n",
      "p1 0.992 p0 0.002\n",
      "trajectorySteps 17\n",
      "[[1 1 1 1 0]\n",
      " [1 0 0 1 0]\n",
      " [2 0 0 1 1]\n",
      " [1 0 2 0 1]\n",
      " [0 0 1 1 1]]\n",
      "➡️➡️➡️⬇️⬇️\n",
      "⬆️⏫️⏩️➡️⬇️\n",
      "⬆️⬅️⏬➡️⬇️\n",
      "⬆️⏩️✅⏪⬇️\n",
      "⬆️⏩️⬆️⬅️⬅️\n",
      "[[-1.4300e-02 -6.6000e-03 -3.4000e-03 -1.7000e-03 -9.0000e-04]\n",
      " [-1.6900e-02 -4.5000e-03 -1.0000e-03 -1.4000e-03 -1.4000e-03]\n",
      " [-2.6100e-02 -1.6500e-02  7.8900e-02 -1.0000e-03 -1.0000e-03]\n",
      " [-1.9000e-02  1.7570e-01  1.0153e+00  3.7200e-02 -1.0000e-04]\n",
      " [-2.1100e-02  5.3000e-03  4.6240e-01  5.9900e-02  5.1000e-03]]\n",
      "mean_state_value 0.06811587730493869\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<GridWorld_v2.GridWorld_v2 at 0x2081c608470>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Expected_SARSA(gridworld) #效果非常好，不仅局部最优，而且全局最优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6253a6e8-4f14-46ea-bcc8-1853258a9d69",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
